/*
 * Copyright (c) 2017, 2025, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */
package jdk.incubator.vector;

import java.lang.foreign.MemorySegment;
import java.lang.foreign.ValueLayout;
import java.nio.ByteOrder;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.Function;

import jdk.internal.foreign.AbstractMemorySegmentImpl;
import jdk.internal.misc.ScopedMemoryAccess;
import jdk.internal.misc.Unsafe;
import jdk.internal.vm.annotation.ForceInline;
import jdk.internal.vm.vector.VectorSupport;

import static jdk.internal.vm.vector.VectorSupport.*;
import static jdk.incubator.vector.VectorIntrinsics.*;

import static jdk.incubator.vector.VectorOperators.*;

#warn This file is preprocessed before being compiled

/**
 * A specialized {@link Vector} representing an ordered immutable sequence of
 * {@code $type$} values.
 */
@SuppressWarnings("cast")  // warning: redundant cast
public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {

    $abstractvectortype$($type$[] vec) {
        super(vec);
    }

#if[FP]
    static final int FORBID_OPCODE_KIND = VO_NOFP;
#else[FP]
    static final int FORBID_OPCODE_KIND = VO_ONLYFP;
#end[FP]

    static final ValueLayout.Of$Type$ ELEMENT_LAYOUT = ValueLayout.JAVA_$TYPE$.withByteAlignment(1);

    @ForceInline
    static int opCode(Operator op) {
        return VectorOperators.opCode(op, VO_OPCODE_VALID, FORBID_OPCODE_KIND);
    }
    @ForceInline
    static int opCode(Operator op, int requireKind) {
        requireKind |= VO_OPCODE_VALID;
        return VectorOperators.opCode(op, requireKind, FORBID_OPCODE_KIND);
    }
    @ForceInline
    static boolean opKind(Operator op, int bit) {
        return VectorOperators.opKind(op, bit);
    }

    // Virtualized factories and operators,
    // coded with portable definitions.
    // These are all @ForceInline in case
    // they need to be used performantly.
    // The various shape-specific subclasses
    // also specialize them by wrapping
    // them in a call like this:
    //    return (Byte128Vector)
    //       super.bOp((Byte128Vector) o);
    // The purpose of that is to forcibly inline
    // the generic definition from this file
    // into a sharply-typed and size-specific
    // wrapper in the subclass file, so that
    // the JIT can specialize the code.
    // The code is only inlined and expanded
    // if it gets hot.  Think of it as a cheap
    // and lazy version of C++ templates.

    // Virtualized getter

    /*package-private*/
    abstract $type$[] vec();

    // Virtualized constructors

    /**
     * Build a vector directly using my own constructor.
     * It is an error if the array is aliased elsewhere.
     */
    /*package-private*/
    abstract $abstractvectortype$ vectorFactory($type$[] vec);

    /**
     * Build a mask directly using my species.
     * It is an error if the array is aliased elsewhere.
     */
    /*package-private*/
    @ForceInline
    final
    AbstractMask<$Boxtype$> maskFactory(boolean[] bits) {
        return vspecies().maskFactory(bits);
    }

    // Constant loader (takes dummy as vector arg)
    interface FVOp {
        $type$ apply(int i);
    }

    /*package-private*/
    @ForceInline
    final
    $abstractvectortype$ vOp(FVOp f) {
        $type$[] res = new $type$[length()];
        for (int i = 0; i < res.length; i++) {
            res[i] = f.apply(i);
        }
        return vectorFactory(res);
    }

    @ForceInline
    final
    $abstractvectortype$ vOp(VectorMask<$Boxtype$> m, FVOp f) {
        $type$[] res = new $type$[length()];
        boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits();
        for (int i = 0; i < res.length; i++) {
            if (mbits[i]) {
                res[i] = f.apply(i);
            }
        }
        return vectorFactory(res);
    }

    // Unary operator

    /*package-private*/
    interface FUnOp {
        $type$ apply(int i, $type$ a);
    }

    /*package-private*/
    abstract
    $abstractvectortype$ uOp(FUnOp f);
    @ForceInline
    final
    $abstractvectortype$ uOpTemplate(FUnOp f) {
        $type$[] vec = vec();
        $type$[] res = new $type$[length()];
        for (int i = 0; i < res.length; i++) {
            res[i] = f.apply(i, vec[i]);
        }
        return vectorFactory(res);
    }

    /*package-private*/
    abstract
    $abstractvectortype$ uOp(VectorMask<$Boxtype$> m,
                             FUnOp f);
    @ForceInline
    final
    $abstractvectortype$ uOpTemplate(VectorMask<$Boxtype$> m,
                                     FUnOp f) {
        if (m == null) {
            return uOpTemplate(f);
        }
        $type$[] vec = vec();
        $type$[] res = new $type$[length()];
        boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits();
        for (int i = 0; i < res.length; i++) {
            res[i] = mbits[i] ? f.apply(i, vec[i]) : vec[i];
        }
        return vectorFactory(res);
    }

    // Binary operator

    /*package-private*/
    interface FBinOp {
        $type$ apply(int i, $type$ a, $type$ b);
    }

    /*package-private*/
    abstract
    $abstractvectortype$ bOp(Vector<$Boxtype$> o,
                             FBinOp f);
    @ForceInline
    final
    $abstractvectortype$ bOpTemplate(Vector<$Boxtype$> o,
                                     FBinOp f) {
        $type$[] res = new $type$[length()];
        $type$[] vec1 = this.vec();
        $type$[] vec2 = (($abstractvectortype$)o).vec();
        for (int i = 0; i < res.length; i++) {
            res[i] = f.apply(i, vec1[i], vec2[i]);
        }
        return vectorFactory(res);
    }

    /*package-private*/
    abstract
    $abstractvectortype$ bOp(Vector<$Boxtype$> o,
                             VectorMask<$Boxtype$> m,
                             FBinOp f);
    @ForceInline
    final
    $abstractvectortype$ bOpTemplate(Vector<$Boxtype$> o,
                                     VectorMask<$Boxtype$> m,
                                     FBinOp f) {
        if (m == null) {
            return bOpTemplate(o, f);
        }
        $type$[] res = new $type$[length()];
        $type$[] vec1 = this.vec();
        $type$[] vec2 = (($abstractvectortype$)o).vec();
        boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits();
        for (int i = 0; i < res.length; i++) {
            res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i]) : vec1[i];
        }
        return vectorFactory(res);
    }

    // Ternary operator

    /*package-private*/
    interface FTriOp {
        $type$ apply(int i, $type$ a, $type$ b, $type$ c);
    }

    /*package-private*/
    abstract
    $abstractvectortype$ tOp(Vector<$Boxtype$> o1,
                             Vector<$Boxtype$> o2,
                             FTriOp f);
    @ForceInline
    final
    $abstractvectortype$ tOpTemplate(Vector<$Boxtype$> o1,
                                     Vector<$Boxtype$> o2,
                                     FTriOp f) {
        $type$[] res = new $type$[length()];
        $type$[] vec1 = this.vec();
        $type$[] vec2 = (($abstractvectortype$)o1).vec();
        $type$[] vec3 = (($abstractvectortype$)o2).vec();
        for (int i = 0; i < res.length; i++) {
            res[i] = f.apply(i, vec1[i], vec2[i], vec3[i]);
        }
        return vectorFactory(res);
    }

    /*package-private*/
    abstract
    $abstractvectortype$ tOp(Vector<$Boxtype$> o1,
                             Vector<$Boxtype$> o2,
                             VectorMask<$Boxtype$> m,
                             FTriOp f);
    @ForceInline
    final
    $abstractvectortype$ tOpTemplate(Vector<$Boxtype$> o1,
                                     Vector<$Boxtype$> o2,
                                     VectorMask<$Boxtype$> m,
                                     FTriOp f) {
        if (m == null) {
            return tOpTemplate(o1, o2, f);
        }
        $type$[] res = new $type$[length()];
        $type$[] vec1 = this.vec();
        $type$[] vec2 = (($abstractvectortype$)o1).vec();
        $type$[] vec3 = (($abstractvectortype$)o2).vec();
        boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits();
        for (int i = 0; i < res.length; i++) {
            res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i], vec3[i]) : vec1[i];
        }
        return vectorFactory(res);
    }

    // Reduction operator

    /*package-private*/
    abstract
    $type$ rOp($type$ v, VectorMask<$Boxtype$> m, FBinOp f);

    @ForceInline
    final
    $type$ rOpTemplate($type$ v, VectorMask<$Boxtype$> m, FBinOp f) {
        if (m == null) {
            return rOpTemplate(v, f);
        }
        $type$[] vec = vec();
        boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits();
        for (int i = 0; i < vec.length; i++) {
            v = mbits[i] ? f.apply(i, v, vec[i]) : v;
        }
        return v;
    }

    @ForceInline
    final
    $type$ rOpTemplate($type$ v, FBinOp f) {
        $type$[] vec = vec();
        for (int i = 0; i < vec.length; i++) {
            v = f.apply(i, v, vec[i]);
        }
        return v;
    }

    // Memory reference

    /*package-private*/
    interface FLdOp<M> {
        $type$ apply(M memory, int offset, int i);
    }

    /*package-private*/
    @ForceInline
    final
    <M> $abstractvectortype$ ldOp(M memory, int offset,
                                  FLdOp<M> f) {
        //dummy; no vec = vec();
        $type$[] res = new $type$[length()];
        for (int i = 0; i < res.length; i++) {
            res[i] = f.apply(memory, offset, i);
        }
        return vectorFactory(res);
    }

    /*package-private*/
    @ForceInline
    final
    <M> $abstractvectortype$ ldOp(M memory, int offset,
                                  VectorMask<$Boxtype$> m,
                                  FLdOp<M> f) {
        //$type$[] vec = vec();
        $type$[] res = new $type$[length()];
        boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits();
        for (int i = 0; i < res.length; i++) {
            if (mbits[i]) {
                res[i] = f.apply(memory, offset, i);
            }
        }
        return vectorFactory(res);
    }

    /*package-private*/
    interface FLdLongOp {
        $type$ apply(MemorySegment memory, long offset, int i);
    }

    /*package-private*/
    @ForceInline
    final
    $abstractvectortype$ ldLongOp(MemorySegment memory, long offset,
                                  FLdLongOp f) {
        //dummy; no vec = vec();
        $type$[] res = new $type$[length()];
        for (int i = 0; i < res.length; i++) {
            res[i] = f.apply(memory, offset, i);
        }
        return vectorFactory(res);
    }

    /*package-private*/
    @ForceInline
    final
    $abstractvectortype$ ldLongOp(MemorySegment memory, long offset,
                                  VectorMask<$Boxtype$> m,
                                  FLdLongOp f) {
        //$type$[] vec = vec();
        $type$[] res = new $type$[length()];
        boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits();
        for (int i = 0; i < res.length; i++) {
            if (mbits[i]) {
                res[i] = f.apply(memory, offset, i);
            }
        }
        return vectorFactory(res);
    }

    static $type$ memorySegmentGet(MemorySegment ms, long o, int i) {
        return ms.get(ELEMENT_LAYOUT, o + i * $sizeInBytes$L);
    }

    interface FStOp<M> {
        void apply(M memory, int offset, int i, $type$ a);
    }

    /*package-private*/
    @ForceInline
    final
    <M> void stOp(M memory, int offset,
                  FStOp<M> f) {
        $type$[] vec = vec();
        for (int i = 0; i < vec.length; i++) {
            f.apply(memory, offset, i, vec[i]);
        }
    }

    /*package-private*/
    @ForceInline
    final
    <M> void stOp(M memory, int offset,
                  VectorMask<$Boxtype$> m,
                  FStOp<M> f) {
        $type$[] vec = vec();
        boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits();
        for (int i = 0; i < vec.length; i++) {
            if (mbits[i]) {
                f.apply(memory, offset, i, vec[i]);
            }
        }
    }

    interface FStLongOp {
        void apply(MemorySegment memory, long offset, int i, $type$ a);
    }

    /*package-private*/
    @ForceInline
    final
    void stLongOp(MemorySegment memory, long offset,
                  FStLongOp f) {
        $type$[] vec = vec();
        for (int i = 0; i < vec.length; i++) {
            f.apply(memory, offset, i, vec[i]);
        }
    }

    /*package-private*/
    @ForceInline
    final
    void stLongOp(MemorySegment memory, long offset,
                  VectorMask<$Boxtype$> m,
                  FStLongOp f) {
        $type$[] vec = vec();
        boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits();
        for (int i = 0; i < vec.length; i++) {
            if (mbits[i]) {
                f.apply(memory, offset, i, vec[i]);
            }
        }
    }

    static void memorySegmentSet(MemorySegment ms, long o, int i, $type$ e) {
        ms.set(ELEMENT_LAYOUT, o + i * $sizeInBytes$L, e);
    }

    // Binary test

    /*package-private*/
    interface FBinTest {
        boolean apply(int cond, int i, $type$ a, $type$ b);
    }

    /*package-private*/
    @ForceInline
    final
    AbstractMask<$Boxtype$> bTest(int cond,
                                  Vector<$Boxtype$> o,
                                  FBinTest f) {
        $type$[] vec1 = vec();
        $type$[] vec2 = (($abstractvectortype$)o).vec();
        boolean[] bits = new boolean[length()];
        for (int i = 0; i < length(); i++){
            bits[i] = f.apply(cond, i, vec1[i], vec2[i]);
        }
        return maskFactory(bits);
    }

#if[BITWISE]
    /*package-private*/
    @ForceInline
    static $type$ rotateLeft($type$ a, int n) {
#if[intOrLong]
        return $Boxtype$.rotateLeft(a, n);
#else[intOrLong]
        return ($type$)((((($type$)a) & $Boxtype$.toUnsignedInt(($type$)-1)) << (n & $Boxtype$.SIZE-1)) | (((($type$)a) & $Boxtype$.toUnsignedInt(($type$)-1)) >>> ($Boxtype$.SIZE - (n & $Boxtype$.SIZE-1))));
#end[intOrLong]
    }

    /*package-private*/
    @ForceInline
    static $type$ rotateRight($type$ a, int n) {
#if[intOrLong]
        return $Boxtype$.rotateRight(a, n);
#else[intOrLong]
        return ($type$)((((($type$)a) & $Boxtype$.toUnsignedInt(($type$)-1)) >>> (n & $Boxtype$.SIZE-1)) | (((($type$)a) & $Boxtype$.toUnsignedInt(($type$)-1)) << ($Boxtype$.SIZE - (n & $Boxtype$.SIZE-1))));
#end[intOrLong]
    }
#end[BITWISE]

    /*package-private*/
    @Override
    abstract $Type$Species vspecies();

    /*package-private*/
    @ForceInline
    static long toBits($type$ e) {
        return {#if[FP]? $Type$.$type$ToRaw$Bitstype$Bits(e): e};
    }

    /*package-private*/
    @ForceInline
    static $type$ fromBits(long bits) {
        return {#if[FP]?$Type$.$bitstype$BitsTo$Type$}(($bitstype$)bits);
    }

    static $abstractvectortype$ expandHelper(Vector<$Boxtype$> v, VectorMask<$Boxtype$> m) {
        VectorSpecies<$Boxtype$> vsp = m.vectorSpecies();
        $abstractvectortype$ r  = ($abstractvectortype$) vsp.zero();
        $abstractvectortype$ vi = ($abstractvectortype$) v;
        if (m.allTrue()) {
            return vi;
        }
        for (int i = 0, j = 0; i < vsp.length(); i++) {
            if (m.laneIsSet(i)) {
                r = r.withLane(i, vi.lane(j++));
            }
        }
        return r;
    }

    static $abstractvectortype$ compressHelper(Vector<$Boxtype$> v, VectorMask<$Boxtype$> m) {
        VectorSpecies<$Boxtype$> vsp = m.vectorSpecies();
        $abstractvectortype$ r  = ($abstractvectortype$) vsp.zero();
        $abstractvectortype$ vi = ($abstractvectortype$) v;
        if (m.allTrue()) {
            return vi;
        }
        for (int i = 0, j = 0; i < vsp.length(); i++) {
            if (m.laneIsSet(i)) {
                r = r.withLane(j++, vi.lane(i));
            }
        }
        return r;
    }

    static $abstractvectortype$ selectFromTwoVectorHelper(Vector<$Boxtype$> indexes, Vector<$Boxtype$> src1, Vector<$Boxtype$> src2) {
        int vlen = indexes.length();
        $type$[] res = new $type$[vlen];
        $type$[] vecPayload1 = (($abstractvectortype$)indexes).vec();
        $type$[] vecPayload2 = (($abstractvectortype$)src1).vec();
        $type$[] vecPayload3 = (($abstractvectortype$)src2).vec();
        for (int i = 0; i < vlen; i++) {
            int wrapped_index = VectorIntrinsics.wrapToRange((int)vecPayload1[i], 2 * vlen);
            res[i] = wrapped_index >= vlen ? vecPayload3[wrapped_index - vlen] : vecPayload2[wrapped_index];
        }
        return (($abstractvectortype$)src1).vectorFactory(res);
    }

    // Static factories (other than memory operations)

    // Note: A surprising behavior in javadoc
    // sometimes makes a lone /** {@inheritDoc} */
    // comment drop the method altogether,
    // apparently if the method mentions a
    // parameter or return type of Vector<$Boxtype$>
    // instead of Vector<E> as originally specified.
    // Adding an empty HTML fragment appears to
    // nudge javadoc into providing the desired
    // inherited documentation.  We use the HTML
    // comment <!--workaround--> for this.

    /**
     * Returns a vector of the given species
     * where all lane elements are set to
     * zero, the default primitive value.
     *
     * @param species species of the desired zero vector
     * @return a zero vector
     */
    @ForceInline
    public static $abstractvectortype$ zero(VectorSpecies<$Boxtype$> species) {
        $Type$Species vsp = ($Type$Species) species;
#if[FP]
        return VectorSupport.fromBitsCoerced(vsp.vectorType(), $type$.class, species.length(),
                        toBits(0.0f), MODE_BROADCAST, vsp,
                        ((bits_, s_) -> s_.rvOp(i -> bits_)));
#else[FP]
        return VectorSupport.fromBitsCoerced(vsp.vectorType(), $type$.class, species.length(),
                                0, MODE_BROADCAST, vsp,
                                ((bits_, s_) -> s_.rvOp(i -> bits_)));
#end[FP]
    }

    /**
     * Returns a vector of the same species as this one
     * where all lane elements are set to
     * the primitive value {@code e}.
     *
     * The contents of the current vector are discarded;
     * only the species is relevant to this operation.
     *
     * <p> This method returns the value of this expression:
     * {@code $abstractvectortype$.broadcast(this.species(), e)}.
     *
     * @apiNote
     * Unlike the similar method named {@code broadcast()}
     * in the supertype {@code Vector}, this method does not
     * need to validate its argument, and cannot throw
     * {@code IllegalArgumentException}.  This method is
     * therefore preferable to the supertype method.
     *
     * @param e the value to broadcast
     * @return a vector where all lane elements are set to
     *         the primitive value {@code e}
     * @see #broadcast(VectorSpecies,long)
     * @see Vector#broadcast(long)
     * @see VectorSpecies#broadcast(long)
     */
    public abstract $abstractvectortype$ broadcast($type$ e);

    /**
     * Returns a vector of the given species
     * where all lane elements are set to
     * the primitive value {@code e}.
     *
     * @param species species of the desired vector
     * @param e the value to broadcast
     * @return a vector where all lane elements are set to
     *         the primitive value {@code e}
     * @see #broadcast(long)
     * @see Vector#broadcast(long)
     * @see VectorSpecies#broadcast(long)
     */
    @ForceInline
    public static $abstractvectortype$ broadcast(VectorSpecies<$Boxtype$> species, $type$ e) {
        $Type$Species vsp = ($Type$Species) species;
        return vsp.broadcast(e);
    }

    /*package-private*/
    @ForceInline
    final $abstractvectortype$ broadcastTemplate($type$ e) {
        $Type$Species vsp = vspecies();
        return vsp.broadcast(e);
    }

#if[!long]
    /**
     * {@inheritDoc} <!--workaround-->
     * @apiNote
     * When working with vector subtypes like {@code $abstractvectortype$},
     * {@linkplain #broadcast($type$) the more strongly typed method}
     * is typically selected.  It can be explicitly selected
     * using a cast: {@code v.broadcast(($type$)e)}.
     * The two expressions will produce numerically identical results.
     */
    @Override
    public abstract $abstractvectortype$ broadcast(long e);

    /**
     * Returns a vector of the given species
     * where all lane elements are set to
     * the primitive value {@code e}.
     *
     * The {@code long} value must be accurately representable
     * by the {@code ETYPE} of the vector species, so that
     * {@code e==(long)(ETYPE)e}.
     *
     * @param species species of the desired vector
     * @param e the value to broadcast
     * @return a vector where all lane elements are set to
     *         the primitive value {@code e}
     * @throws IllegalArgumentException
     *         if the given {@code long} value cannot
     *         be represented by the vector's {@code ETYPE}
     * @see #broadcast(VectorSpecies,$type$)
     * @see VectorSpecies#checkValue(long)
     */
    @ForceInline
    public static $abstractvectortype$ broadcast(VectorSpecies<$Boxtype$> species, long e) {
        $Type$Species vsp = ($Type$Species) species;
        return vsp.broadcast(e);
    }

    /*package-private*/
    @ForceInline
    final $abstractvectortype$ broadcastTemplate(long e) {
        return vspecies().broadcast(e);
    }
#end[!long]

    // Unary lanewise support

    /**
     * {@inheritDoc} <!--workaround-->
     */
    public abstract
    $abstractvectortype$ lanewise(VectorOperators.Unary op);

    @ForceInline
    final
    $abstractvectortype$ lanewiseTemplate(VectorOperators.Unary op) {
        if (opKind(op, VO_SPECIAL)) {
            if (op == ZOMO) {
                return blend(broadcast(-1), compare(NE, 0));
            }
#if[BITWISE]
            else if (op == NOT) {
                return broadcast(-1).lanewise(XOR, this);
            }
#end[BITWISE]
#if[FP]
            else if (opKind(op, VO_MATHLIB)) {
                return unaryMathOp(op);
            }
#end[FP]
        }
        int opc = opCode(op);
        return VectorSupport.unaryOp(
            opc, getClass(), null, $type$.class, length(),
            this, null,
            UN_IMPL.find(op, opc, $abstractvectortype$::unaryOperations));
    }

    /**
     * {@inheritDoc} <!--workaround-->
     */
    @Override
    public abstract
    $abstractvectortype$ lanewise(VectorOperators.Unary op,
                                  VectorMask<$Boxtype$> m);
    @ForceInline
    final
    $abstractvectortype$ lanewiseTemplate(VectorOperators.Unary op,
                                          Class<? extends VectorMask<$Boxtype$>> maskClass,
                                          VectorMask<$Boxtype$> m) {
        m.check(maskClass, this);
        if (opKind(op, VO_SPECIAL)) {
            if (op == ZOMO) {
                return blend(broadcast(-1), compare(NE, 0, m));
            }
#if[BITWISE]
            else if (op == NOT) {
                return lanewise(XOR, broadcast(-1), m);
            }
#end[BITWISE]
#if[FP]
            else if (opKind(op, VO_MATHLIB)) {
                return blend(unaryMathOp(op), m);
            }
#end[FP]
        }
        int opc = opCode(op);
        return VectorSupport.unaryOp(
            opc, getClass(), maskClass, $type$.class, length(),
            this, m,
            UN_IMPL.find(op, opc, $abstractvectortype$::unaryOperations));
    }

#if[FP]
    @ForceInline
    final
    $abstractvectortype$ unaryMathOp(VectorOperators.Unary op) {
        return VectorMathLibrary.unaryMathOp(op, opCode(op), species(), $abstractvectortype$::unaryOperations,
                                             this);
    }
#end[FP]

    private static final
    ImplCache<Unary, UnaryOperation<$abstractvectortype$, VectorMask<$Boxtype$>>>
        UN_IMPL = new ImplCache<>(Unary.class, $Type$Vector.class);

    private static UnaryOperation<$abstractvectortype$, VectorMask<$Boxtype$>> unaryOperations(int opc_) {
        switch (opc_) {
            case VECTOR_OP_NEG: return (v0, m) ->
                    v0.uOp(m, (i, a) -> ($type$) -a);
            case VECTOR_OP_ABS: return (v0, m) ->
                    v0.uOp(m, (i, a) -> ($type$) Math.abs(a));
#if[!FP]
#if[intOrLong]
            case VECTOR_OP_BIT_COUNT: return (v0, m) ->
                    v0.uOp(m, (i, a) -> ($type$) $Boxtype$.bitCount(a));
            case VECTOR_OP_TZ_COUNT: return (v0, m) ->
                    v0.uOp(m, (i, a) -> ($type$) $Boxtype$.numberOfTrailingZeros(a));
            case VECTOR_OP_LZ_COUNT: return (v0, m) ->
                    v0.uOp(m, (i, a) -> ($type$) $Boxtype$.numberOfLeadingZeros(a));
            case VECTOR_OP_REVERSE: return (v0, m) ->
                    v0.uOp(m, (i, a) -> ($type$) $Boxtype$.reverse(a));
#else[intOrLong]
            case VECTOR_OP_BIT_COUNT: return (v0, m) ->
                    v0.uOp(m, (i, a) -> ($type$) bitCount(a));
            case VECTOR_OP_TZ_COUNT: return (v0, m) ->
                    v0.uOp(m, (i, a) -> ($type$) numberOfTrailingZeros(a));
            case VECTOR_OP_LZ_COUNT: return (v0, m) ->
                    v0.uOp(m, (i, a) -> ($type$) numberOfLeadingZeros(a));
            case VECTOR_OP_REVERSE: return (v0, m) ->
                    v0.uOp(m, (i, a) -> reverse(a));
#end[intOrLong]
#if[BITWISE]
#if[byte]
            case VECTOR_OP_REVERSE_BYTES: return (v0, m) ->
                    v0.uOp(m, (i, a) -> a);
#else[byte]
            case VECTOR_OP_REVERSE_BYTES: return (v0, m) ->
                    v0.uOp(m, (i, a) -> ($type$) $Boxtype$.reverseBytes(a));
#end[byte]
#end[BITWISE]
#end[!FP]
#if[FP]
            case VECTOR_OP_SIN: return (v0, m) ->
                    v0.uOp(m, (i, a) -> ($type$) Math.sin(a));
            case VECTOR_OP_COS: return (v0, m) ->
                    v0.uOp(m, (i, a) -> ($type$) Math.cos(a));
            case VECTOR_OP_TAN: return (v0, m) ->
                    v0.uOp(m, (i, a) -> ($type$) Math.tan(a));
            case VECTOR_OP_ASIN: return (v0, m) ->
                    v0.uOp(m, (i, a) -> ($type$) Math.asin(a));
            case VECTOR_OP_ACOS: return (v0, m) ->
                    v0.uOp(m, (i, a) -> ($type$) Math.acos(a));
            case VECTOR_OP_ATAN: return (v0, m) ->
                    v0.uOp(m, (i, a) -> ($type$) Math.atan(a));
            case VECTOR_OP_EXP: return (v0, m) ->
                    v0.uOp(m, (i, a) -> ($type$) Math.exp(a));
            case VECTOR_OP_LOG: return (v0, m) ->
                    v0.uOp(m, (i, a) -> ($type$) Math.log(a));
            case VECTOR_OP_LOG10: return (v0, m) ->
                    v0.uOp(m, (i, a) -> ($type$) Math.log10(a));
            case VECTOR_OP_SQRT: return (v0, m) ->
                    v0.uOp(m, (i, a) -> ($type$) Math.sqrt(a));
            case VECTOR_OP_CBRT: return (v0, m) ->
                    v0.uOp(m, (i, a) -> ($type$) Math.cbrt(a));
            case VECTOR_OP_SINH: return (v0, m) ->
                    v0.uOp(m, (i, a) -> ($type$) Math.sinh(a));
            case VECTOR_OP_COSH: return (v0, m) ->
                    v0.uOp(m, (i, a) -> ($type$) Math.cosh(a));
            case VECTOR_OP_TANH: return (v0, m) ->
                    v0.uOp(m, (i, a) -> ($type$) Math.tanh(a));
            case VECTOR_OP_EXPM1: return (v0, m) ->
                    v0.uOp(m, (i, a) -> ($type$) Math.expm1(a));
            case VECTOR_OP_LOG1P: return (v0, m) ->
                    v0.uOp(m, (i, a) -> ($type$) Math.log1p(a));
#end[FP]
            default: return null;
        }
    }

    // Binary lanewise support

    /**
     * {@inheritDoc} <!--workaround-->
     * @see #lanewise(VectorOperators.Binary,$type$)
     * @see #lanewise(VectorOperators.Binary,$type$,VectorMask)
     */
    @Override
    public abstract
    $abstractvectortype$ lanewise(VectorOperators.Binary op,
                                  Vector<$Boxtype$> v);
    @ForceInline
    final
    $abstractvectortype$ lanewiseTemplate(VectorOperators.Binary op,
                                          Vector<$Boxtype$> v) {
        $abstractvectortype$ that = ($abstractvectortype$) v;
        that.check(this);

        if (opKind(op, VO_SPECIAL {#if[!FP]? | VO_SHIFT})) {
            if (op == FIRST_NONZERO) {
                VectorMask<$Boxbitstype$> mask
                    = this{#if[FP]?.viewAsIntegralLanes()}.compare(EQ, ($bitstype$) 0);
                return this.blend(that, mask{#if[FP]?.cast(vspecies())});
            }
#if[FP]
            else if (opKind(op, VO_MATHLIB)) {
                return binaryMathOp(op, that);
            }
#end[FP]
#if[BITWISE]
#if[!FP]
            if (opKind(op, VO_SHIFT)) {
                // As per shift specification for Java, mask the shift count.
                // This allows the JIT to ignore some ISA details.
                that = that.lanewise(AND, SHIFT_MASK);
            }
#end[!FP]
            if (op == AND_NOT) {
                // FIXME: Support this in the JIT.
                that = that.lanewise(NOT);
                op = AND;
            } else if (op == DIV) {
                VectorMask<$Boxtype$> eqz = that.eq(($type$) 0);
                if (eqz.anyTrue()) {
                    throw that.divZeroException();
                }
            }
#end[BITWISE]
        }

        int opc = opCode(op);
        return VectorSupport.binaryOp(
            opc, getClass(), null, $type$.class, length(),
            this, that, null,
            BIN_IMPL.find(op, opc, $abstractvectortype$::binaryOperations));
    }

    /**
     * {@inheritDoc} <!--workaround-->
     * @see #lanewise(VectorOperators.Binary,$type$,VectorMask)
     */
    @Override
    public abstract
    $abstractvectortype$ lanewise(VectorOperators.Binary op,
                                  Vector<$Boxtype$> v,
                                  VectorMask<$Boxtype$> m);
    @ForceInline
    final
    $abstractvectortype$ lanewiseTemplate(VectorOperators.Binary op,
                                          Class<? extends VectorMask<$Boxtype$>> maskClass,
                                          Vector<$Boxtype$> v, VectorMask<$Boxtype$> m) {
        $abstractvectortype$ that = ($abstractvectortype$) v;
        that.check(this);
        m.check(maskClass, this);

        if (opKind(op, VO_SPECIAL {#if[!FP]? | VO_SHIFT})) {
            if (op == FIRST_NONZERO) {
#if[FP]
                $Bitstype$Vector bits = this.viewAsIntegralLanes();
                VectorMask<$Boxbitstype$> mask
                    = bits.compare(EQ, ($bitstype$) 0, m.cast(bits.vspecies()));
                return this.blend(that, mask.cast(vspecies()));
#else[FP]
                VectorMask<$Boxtype$> mask
                    = this.compare(EQ, ($type$) 0, m);
                return this.blend(that, mask);
#end[FP]
            }
#if[FP]
            else if (opKind(op, VO_MATHLIB)) {
                return this.blend(binaryMathOp(op, that), m);
            }
#end[FP]

#if[BITWISE]
#if[!FP]
            if (opKind(op, VO_SHIFT)) {
                // As per shift specification for Java, mask the shift count.
                // This allows the JIT to ignore some ISA details.
                that = that.lanewise(AND, SHIFT_MASK);
            }
#end[!FP]
            if (op == AND_NOT) {
                // FIXME: Support this in the JIT.
                that = that.lanewise(NOT);
                op = AND;
            } else if (op == DIV) {
                VectorMask<$Boxtype$> eqz = that.eq(($type$)0);
                if (eqz.and(m).anyTrue()) {
                    throw that.divZeroException();
                }
                // suppress div/0 exceptions in unset lanes
                that = that.lanewise(NOT, eqz);
            }
#end[BITWISE]
        }

        int opc = opCode(op);
        return VectorSupport.binaryOp(
            opc, getClass(), maskClass, $type$.class, length(),
            this, that, m,
            BIN_IMPL.find(op, opc, $abstractvectortype$::binaryOperations));
    }

#if[FP]
    @ForceInline
    final
    $abstractvectortype$ binaryMathOp(VectorOperators.Binary op, $abstractvectortype$ that) {
        return VectorMathLibrary.binaryMathOp(op, opCode(op), species(), $abstractvectortype$::binaryOperations,
                                              this, that);
    }
#end[FP]

    private static final
    ImplCache<Binary, BinaryOperation<$abstractvectortype$, VectorMask<$Boxtype$>>>
        BIN_IMPL = new ImplCache<>(Binary.class, $Type$Vector.class);

    private static BinaryOperation<$abstractvectortype$, VectorMask<$Boxtype$>> binaryOperations(int opc_) {
        switch (opc_) {
            case VECTOR_OP_ADD: return (v0, v1, vm) ->
                    v0.bOp(v1, vm, (i, a, b) -> ($type$)(a + b));
            case VECTOR_OP_SUB: return (v0, v1, vm) ->
                    v0.bOp(v1, vm, (i, a, b) -> ($type$)(a - b));
            case VECTOR_OP_MUL: return (v0, v1, vm) ->
                    v0.bOp(v1, vm, (i, a, b) -> ($type$)(a * b));
            case VECTOR_OP_DIV: return (v0, v1, vm) ->
                    v0.bOp(v1, vm, (i, a, b) -> ($type$)(a / b));
            case VECTOR_OP_MAX: return (v0, v1, vm) ->
                    v0.bOp(v1, vm, (i, a, b) -> ($type$)Math.max(a, b));
            case VECTOR_OP_MIN: return (v0, v1, vm) ->
                    v0.bOp(v1, vm, (i, a, b) -> ($type$)Math.min(a, b));
#if[BITWISE]
            case VECTOR_OP_AND: return (v0, v1, vm) ->
                    v0.bOp(v1, vm, (i, a, b) -> ($type$)(a & b));
            case VECTOR_OP_OR: return (v0, v1, vm) ->
                    v0.bOp(v1, vm, (i, a, b) -> ($type$)(a | b));
            case VECTOR_OP_XOR: return (v0, v1, vm) ->
                    v0.bOp(v1, vm, (i, a, b) -> ($type$)(a ^ b));
            case VECTOR_OP_LSHIFT: return (v0, v1, vm) ->
                    v0.bOp(v1, vm, (i, a, n) -> ($type$)(a << n));
            case VECTOR_OP_RSHIFT: return (v0, v1, vm) ->
                    v0.bOp(v1, vm, (i, a, n) -> ($type$)(a >> n));
            case VECTOR_OP_URSHIFT: return (v0, v1, vm) ->
                    v0.bOp(v1, vm, (i, a, n) -> ($type$)((a & LSHR_SETUP_MASK) >>> n));
            case VECTOR_OP_LROTATE: return (v0, v1, vm) ->
                    v0.bOp(v1, vm, (i, a, n) -> rotateLeft(a, (int)n));
            case VECTOR_OP_RROTATE: return (v0, v1, vm) ->
                    v0.bOp(v1, vm, (i, a, n) -> rotateRight(a, (int)n));
            case VECTOR_OP_UMAX: return (v0, v1, vm) ->
                    v0.bOp(v1, vm, (i, a, b) -> ($type$)VectorMath.maxUnsigned(a, b));
            case VECTOR_OP_UMIN: return (v0, v1, vm) ->
                    v0.bOp(v1, vm, (i, a, b) -> ($type$)VectorMath.minUnsigned(a, b));
            case VECTOR_OP_SADD: return (v0, v1, vm) ->
                    v0.bOp(v1, vm, (i, a, b) -> ($type$)(VectorMath.addSaturating(a, b)));
            case VECTOR_OP_SSUB: return (v0, v1, vm) ->
                    v0.bOp(v1, vm, (i, a, b) -> ($type$)(VectorMath.subSaturating(a, b)));
            case VECTOR_OP_SUADD: return (v0, v1, vm) ->
                    v0.bOp(v1, vm, (i, a, b) -> ($type$)(VectorMath.addSaturatingUnsigned(a, b)));
            case VECTOR_OP_SUSUB: return (v0, v1, vm) ->
                    v0.bOp(v1, vm, (i, a, b) -> ($type$)(VectorMath.subSaturatingUnsigned(a, b)));
#if[intOrLong]
            case VECTOR_OP_COMPRESS_BITS: return (v0, v1, vm) ->
                    v0.bOp(v1, vm, (i, a, n) -> $Boxtype$.compress(a, n));
            case VECTOR_OP_EXPAND_BITS: return (v0, v1, vm) ->
                    v0.bOp(v1, vm, (i, a, n) -> $Boxtype$.expand(a, n));
#end[intOrLong]
#end[BITWISE]
#if[FP]
            case VECTOR_OP_OR: return (v0, v1, vm) ->
                    v0.bOp(v1, vm, (i, a, b) -> fromBits(toBits(a) | toBits(b)));
            case VECTOR_OP_ATAN2: return (v0, v1, vm) ->
                    v0.bOp(v1, vm, (i, a, b) -> ($type$) Math.atan2(a, b));
            case VECTOR_OP_POW: return (v0, v1, vm) ->
                    v0.bOp(v1, vm, (i, a, b) -> ($type$) Math.pow(a, b));
            case VECTOR_OP_HYPOT: return (v0, v1, vm) ->
                    v0.bOp(v1, vm, (i, a, b) -> ($type$) Math.hypot(a, b));
#end[FP]
            default: return null;
        }
    }

    // FIXME: Maybe all of the public final methods in this file (the
    // simple ones that just call lanewise) should be pushed down to
    // the X-VectorBits template.  They can't optimize properly at
    // this level, and must rely on inlining.  Does it work?
    // (If it works, of course keep the code here.)

    /**
     * Combines the lane values of this vector
     * with the value of a broadcast scalar.
     *
     * This is a lane-wise binary operation which applies
     * the selected operation to each lane.
     * The return value will be equal to this expression:
     * {@code this.lanewise(op, this.broadcast(e))}.
     *
     * @param op the operation used to process lane values
     * @param e the input scalar
     * @return the result of applying the operation lane-wise
     *         to the two input vectors
     * @throws UnsupportedOperationException if this vector does
     *         not support the requested operation
     * @see #lanewise(VectorOperators.Binary,Vector)
     * @see #lanewise(VectorOperators.Binary,$type$,VectorMask)
     */
    @ForceInline
    public final
    $abstractvectortype$ lanewise(VectorOperators.Binary op,
                                  $type$ e) {
#if[BITWISE]
        if (opKind(op, VO_SHIFT) && ($type$)(int)e == e) {
            return lanewiseShift(op, (int) e);
        }
        if (op == AND_NOT) {
            op = AND; e = ($type$) ~e;
        }
#end[BITWISE]
        return lanewise(op, broadcast(e));
    }

    /**
     * Combines the lane values of this vector
     * with the value of a broadcast scalar,
     * with selection of lane elements controlled by a mask.
     *
     * This is a masked lane-wise binary operation which applies
     * the selected operation to each lane.
     * The return value will be equal to this expression:
     * {@code this.lanewise(op, this.broadcast(e), m)}.
     *
     * @param op the operation used to process lane values
     * @param e the input scalar
     * @param m the mask controlling lane selection
     * @return the result of applying the operation lane-wise
     *         to the input vector and the scalar
     * @throws UnsupportedOperationException if this vector does
     *         not support the requested operation
     * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
     * @see #lanewise(VectorOperators.Binary,$type$)
     */
    @ForceInline
    public final
    $abstractvectortype$ lanewise(VectorOperators.Binary op,
                                  $type$ e,
                                  VectorMask<$Boxtype$> m) {
#if[BITWISE]
        if (opKind(op, VO_SHIFT) && ($type$)(int)e == e) {
            return lanewiseShift(op, (int) e, m);
        }
        if (op == AND_NOT) {
            op = AND; e = ($type$) ~e;
        }
#end[BITWISE]
        return lanewise(op, broadcast(e), m);
    }

#if[!long]
    /**
     * {@inheritDoc} <!--workaround-->
     * @apiNote
     * When working with vector subtypes like {@code $abstractvectortype$},
     * {@linkplain #lanewise(VectorOperators.Binary,$type$)
     * the more strongly typed method}
     * is typically selected.  It can be explicitly selected
     * using a cast: {@code v.lanewise(op,($type$)e)}.
     * The two expressions will produce numerically identical results.
     */
    @ForceInline
    public final
    $abstractvectortype$ lanewise(VectorOperators.Binary op,
                                  long e) {
        $type$ e1 = ($type$) e;
#if[BITWISE]
        if ((long)e1 != e
            // allow shift ops to clip down their int parameters
            && !(opKind(op, VO_SHIFT) && (int)e1 == e)) {
#else[BITWISE]
        if ((long)e1 != e) {
#end[BITWISE]
            vspecies().checkValue(e);  // for exception
        }
        return lanewise(op, e1);
    }

    /**
     * {@inheritDoc} <!--workaround-->
     * @apiNote
     * When working with vector subtypes like {@code $abstractvectortype$},
     * {@linkplain #lanewise(VectorOperators.Binary,$type$,VectorMask)
     * the more strongly typed method}
     * is typically selected.  It can be explicitly selected
     * using a cast: {@code v.lanewise(op,($type$)e,m)}.
     * The two expressions will produce numerically identical results.
     */
    @ForceInline
    public final
    $abstractvectortype$ lanewise(VectorOperators.Binary op,
                                  long e, VectorMask<$Boxtype$> m) {
        $type$ e1 = ($type$) e;
#if[BITWISE]
        if ((long)e1 != e
            // allow shift ops to clip down their int parameters
            && !(opKind(op, VO_SHIFT) && (int)e1 == e)) {
#else[BITWISE]
        if ((long)e1 != e) {
#end[BITWISE]
            vspecies().checkValue(e);  // for exception
        }
        return lanewise(op, e1, m);
    }
#end[!long]

#if[BITWISE]
    /*package-private*/
    abstract $abstractvectortype$
    lanewiseShift(VectorOperators.Binary op, int e);

    /*package-private*/
    @ForceInline
    final $abstractvectortype$
    lanewiseShiftTemplate(VectorOperators.Binary op, int e) {
        // Special handling for these.  FIXME: Refactor?
        assert(opKind(op, VO_SHIFT));
        // As per shift specification for Java, mask the shift count.
        e &= SHIFT_MASK;
        int opc = opCode(op);
        return VectorSupport.broadcastInt(
            opc, getClass(), null, $type$.class, length(),
            this, e, null,
            BIN_INT_IMPL.find(op, opc, $abstractvectortype$::broadcastIntOperations));
    }

    /*package-private*/
    abstract $abstractvectortype$
    lanewiseShift(VectorOperators.Binary op, int e, VectorMask<$Boxtype$> m);

    /*package-private*/
    @ForceInline
    final $abstractvectortype$
    lanewiseShiftTemplate(VectorOperators.Binary op,
                          Class<? extends VectorMask<$Boxtype$>> maskClass,
                          int e, VectorMask<$Boxtype$> m) {
        m.check(maskClass, this);
        assert(opKind(op, VO_SHIFT));
        // As per shift specification for Java, mask the shift count.
        e &= SHIFT_MASK;
        int opc = opCode(op);
        return VectorSupport.broadcastInt(
            opc, getClass(), maskClass, $type$.class, length(),
            this, e, m,
            BIN_INT_IMPL.find(op, opc, $abstractvectortype$::broadcastIntOperations));
    }

    private static final
    ImplCache<Binary,VectorBroadcastIntOp<$abstractvectortype$, VectorMask<$Boxtype$>>> BIN_INT_IMPL
        = new ImplCache<>(Binary.class, $Type$Vector.class);

    private static VectorBroadcastIntOp<$abstractvectortype$, VectorMask<$Boxtype$>> broadcastIntOperations(int opc_) {
        switch (opc_) {
            case VECTOR_OP_LSHIFT: return (v, n, m) ->
                    v.uOp(m, (i, a) -> ($type$)(a << n));
            case VECTOR_OP_RSHIFT: return (v, n, m) ->
                    v.uOp(m, (i, a) -> ($type$)(a >> n));
            case VECTOR_OP_URSHIFT: return (v, n, m) ->
                    v.uOp(m, (i, a) -> ($type$)((a & LSHR_SETUP_MASK) >>> n));
            case VECTOR_OP_LROTATE: return (v, n, m) ->
                    v.uOp(m, (i, a) -> rotateLeft(a, (int)n));
            case VECTOR_OP_RROTATE: return (v, n, m) ->
                    v.uOp(m, (i, a) -> rotateRight(a, (int)n));
            default: return null;
        }
    }

    // As per shift specification for Java, mask the shift count.
    // We mask 0X3F (long), 0X1F (int), 0x0F (short), 0x7 (byte).
    // The latter two maskings go beyond the JLS, but seem reasonable
    // since our lane types are first-class types, not just dressed
    // up ints.
    private static final int SHIFT_MASK = ($Boxtype$.SIZE - 1);
#if[byteOrShort]
    // Also simulate >>> on sub-word variables with a mask.
    private static final int LSHR_SETUP_MASK = ((1 << $Boxtype$.SIZE) - 1);
#else[byteOrShort]
    private static final $type$ LSHR_SETUP_MASK = -1;
#end[byteOrShort]
#end[BITWISE]

    // Ternary lanewise support

    // Ternary operators come in eight variations:
    //   lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2])
    //   lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2], mask)

    // It is annoying to support all of these variations of masking
    // and broadcast, but it would be more surprising not to continue
    // the obvious pattern started by unary and binary.

    /**
     * {@inheritDoc} <!--workaround-->
     * @see #lanewise(VectorOperators.Ternary,$type$,$type$,VectorMask)
     * @see #lanewise(VectorOperators.Ternary,Vector,$type$,VectorMask)
     * @see #lanewise(VectorOperators.Ternary,$type$,Vector,VectorMask)
     * @see #lanewise(VectorOperators.Ternary,$type$,$type$)
     * @see #lanewise(VectorOperators.Ternary,Vector,$type$)
     * @see #lanewise(VectorOperators.Ternary,$type$,Vector)
     */
    @Override
    public abstract
    $abstractvectortype$ lanewise(VectorOperators.Ternary op,
                                                  Vector<$Boxtype$> v1,
                                                  Vector<$Boxtype$> v2);
    @ForceInline
    final
    $abstractvectortype$ lanewiseTemplate(VectorOperators.Ternary op,
                                          Vector<$Boxtype$> v1,
                                          Vector<$Boxtype$> v2) {
        $abstractvectortype$ that = ($abstractvectortype$) v1;
        $abstractvectortype$ tother = ($abstractvectortype$) v2;
        // It's a word: https://www.dictionary.com/browse/tother
        // See also Chapter 11 of Dickens, Our Mutual Friend:
        // "Totherest Governor," replied Mr Riderhood...
        that.check(this);
        tother.check(this);
#if[BITWISE]
        if (op == BITWISE_BLEND) {
            // FIXME: Support this in the JIT.
            that = this.lanewise(XOR, that).lanewise(AND, tother);
            return this.lanewise(XOR, that);
        }
#end[BITWISE]
        int opc = opCode(op);
        return VectorSupport.ternaryOp(
            opc, getClass(), null, $type$.class, length(),
            this, that, tother, null,
            TERN_IMPL.find(op, opc, $abstractvectortype$::ternaryOperations));
    }

    /**
     * {@inheritDoc} <!--workaround-->
     * @see #lanewise(VectorOperators.Ternary,$type$,$type$,VectorMask)
     * @see #lanewise(VectorOperators.Ternary,Vector,$type$,VectorMask)
     * @see #lanewise(VectorOperators.Ternary,$type$,Vector,VectorMask)
     */
    @Override
    public abstract
    $abstractvectortype$ lanewise(VectorOperators.Ternary op,
                                  Vector<$Boxtype$> v1,
                                  Vector<$Boxtype$> v2,
                                  VectorMask<$Boxtype$> m);
    @ForceInline
    final
    $abstractvectortype$ lanewiseTemplate(VectorOperators.Ternary op,
                                          Class<? extends VectorMask<$Boxtype$>> maskClass,
                                          Vector<$Boxtype$> v1,
                                          Vector<$Boxtype$> v2,
                                          VectorMask<$Boxtype$> m) {
        $abstractvectortype$ that = ($abstractvectortype$) v1;
        $abstractvectortype$ tother = ($abstractvectortype$) v2;
        // It's a word: https://www.dictionary.com/browse/tother
        // See also Chapter 11 of Dickens, Our Mutual Friend:
        // "Totherest Governor," replied Mr Riderhood...
        that.check(this);
        tother.check(this);
        m.check(maskClass, this);

#if[BITWISE]
        if (op == BITWISE_BLEND) {
            // FIXME: Support this in the JIT.
            that = this.lanewise(XOR, that).lanewise(AND, tother);
            return this.lanewise(XOR, that, m);
        }
#end[BITWISE]
        int opc = opCode(op);
        return VectorSupport.ternaryOp(
            opc, getClass(), maskClass, $type$.class, length(),
            this, that, tother, m,
            TERN_IMPL.find(op, opc, $abstractvectortype$::ternaryOperations));
    }

    private static final
    ImplCache<Ternary, TernaryOperation<$abstractvectortype$, VectorMask<$Boxtype$>>>
        TERN_IMPL = new ImplCache<>(Ternary.class, $Type$Vector.class);

    private static TernaryOperation<$abstractvectortype$, VectorMask<$Boxtype$>> ternaryOperations(int opc_) {
        switch (opc_) {
#if[FP]
            case VECTOR_OP_FMA: return (v0, v1_, v2_, m) ->
                    v0.tOp(v1_, v2_, m, (i, a, b, c) -> Math.fma(a, b, c));
#end[FP]
            default: return null;
        }
    }

    /**
     * Combines the lane values of this vector
     * with the values of two broadcast scalars.
     *
     * This is a lane-wise ternary operation which applies
     * the selected operation to each lane.
     * The return value will be equal to this expression:
     * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2))}.
     *
     * @param op the operation used to combine lane values
     * @param e1 the first input scalar
     * @param e2 the second input scalar
     * @return the result of applying the operation lane-wise
     *         to the input vector and the scalars
     * @throws UnsupportedOperationException if this vector does
     *         not support the requested operation
     * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
     * @see #lanewise(VectorOperators.Ternary,$type$,$type$,VectorMask)
     */
    @ForceInline
    public final
    $abstractvectortype$ lanewise(VectorOperators.Ternary op, //(op,e1,e2)
                                  $type$ e1,
                                  $type$ e2) {
        return lanewise(op, broadcast(e1), broadcast(e2));
    }

    /**
     * Combines the lane values of this vector
     * with the values of two broadcast scalars,
     * with selection of lane elements controlled by a mask.
     *
     * This is a masked lane-wise ternary operation which applies
     * the selected operation to each lane.
     * The return value will be equal to this expression:
     * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2), m)}.
     *
     * @param op the operation used to combine lane values
     * @param e1 the first input scalar
     * @param e2 the second input scalar
     * @param m the mask controlling lane selection
     * @return the result of applying the operation lane-wise
     *         to the input vector and the scalars
     * @throws UnsupportedOperationException if this vector does
     *         not support the requested operation
     * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
     * @see #lanewise(VectorOperators.Ternary,$type$,$type$)
     */
    @ForceInline
    public final
    $abstractvectortype$ lanewise(VectorOperators.Ternary op, //(op,e1,e2,m)
                                  $type$ e1,
                                  $type$ e2,
                                  VectorMask<$Boxtype$> m) {
        return lanewise(op, broadcast(e1), broadcast(e2), m);
    }

    /**
     * Combines the lane values of this vector
     * with the values of another vector and a broadcast scalar.
     *
     * This is a lane-wise ternary operation which applies
     * the selected operation to each lane.
     * The return value will be equal to this expression:
     * {@code this.lanewise(op, v1, this.broadcast(e2))}.
     *
     * @param op the operation used to combine lane values
     * @param v1 the other input vector
     * @param e2 the input scalar
     * @return the result of applying the operation lane-wise
     *         to the input vectors and the scalar
     * @throws UnsupportedOperationException if this vector does
     *         not support the requested operation
     * @see #lanewise(VectorOperators.Ternary,$type$,$type$)
     * @see #lanewise(VectorOperators.Ternary,Vector,$type$,VectorMask)
     */
    @ForceInline
    public final
    $abstractvectortype$ lanewise(VectorOperators.Ternary op, //(op,v1,e2)
                                  Vector<$Boxtype$> v1,
                                  $type$ e2) {
        return lanewise(op, v1, broadcast(e2));
    }

    /**
     * Combines the lane values of this vector
     * with the values of another vector and a broadcast scalar,
     * with selection of lane elements controlled by a mask.
     *
     * This is a masked lane-wise ternary operation which applies
     * the selected operation to each lane.
     * The return value will be equal to this expression:
     * {@code this.lanewise(op, v1, this.broadcast(e2), m)}.
     *
     * @param op the operation used to combine lane values
     * @param v1 the other input vector
     * @param e2 the input scalar
     * @param m the mask controlling lane selection
     * @return the result of applying the operation lane-wise
     *         to the input vectors and the scalar
     * @throws UnsupportedOperationException if this vector does
     *         not support the requested operation
     * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
     * @see #lanewise(VectorOperators.Ternary,$type$,$type$,VectorMask)
     * @see #lanewise(VectorOperators.Ternary,Vector,$type$)
     */
    @ForceInline
    public final
    $abstractvectortype$ lanewise(VectorOperators.Ternary op, //(op,v1,e2,m)
                                  Vector<$Boxtype$> v1,
                                  $type$ e2,
                                  VectorMask<$Boxtype$> m) {
        return lanewise(op, v1, broadcast(e2), m);
    }

    /**
     * Combines the lane values of this vector
     * with the values of another vector and a broadcast scalar.
     *
     * This is a lane-wise ternary operation which applies
     * the selected operation to each lane.
     * The return value will be equal to this expression:
     * {@code this.lanewise(op, this.broadcast(e1), v2)}.
     *
     * @param op the operation used to combine lane values
     * @param e1 the input scalar
     * @param v2 the other input vector
     * @return the result of applying the operation lane-wise
     *         to the input vectors and the scalar
     * @throws UnsupportedOperationException if this vector does
     *         not support the requested operation
     * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
     * @see #lanewise(VectorOperators.Ternary,$type$,Vector,VectorMask)
     */
    @ForceInline
    public final
    $abstractvectortype$ lanewise(VectorOperators.Ternary op, //(op,e1,v2)
                                  $type$ e1,
                                  Vector<$Boxtype$> v2) {
        return lanewise(op, broadcast(e1), v2);
    }

    /**
     * Combines the lane values of this vector
     * with the values of another vector and a broadcast scalar,
     * with selection of lane elements controlled by a mask.
     *
     * This is a masked lane-wise ternary operation which applies
     * the selected operation to each lane.
     * The return value will be equal to this expression:
     * {@code this.lanewise(op, this.broadcast(e1), v2, m)}.
     *
     * @param op the operation used to combine lane values
     * @param e1 the input scalar
     * @param v2 the other input vector
     * @param m the mask controlling lane selection
     * @return the result of applying the operation lane-wise
     *         to the input vectors and the scalar
     * @throws UnsupportedOperationException if this vector does
     *         not support the requested operation
     * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
     * @see #lanewise(VectorOperators.Ternary,$type$,Vector)
     */
    @ForceInline
    public final
    $abstractvectortype$ lanewise(VectorOperators.Ternary op, //(op,e1,v2,m)
                                  $type$ e1,
                                  Vector<$Boxtype$> v2,
                                  VectorMask<$Boxtype$> m) {
        return lanewise(op, broadcast(e1), v2, m);
    }

    // (Thus endeth the Great and Mighty Ternary Ogdoad.)
    // https://en.wikipedia.org/wiki/Ogdoad

    /// FULL-SERVICE BINARY METHODS: ADD, SUB, MUL, DIV
    //
    // These include masked and non-masked versions.
    // This subclass adds broadcast (masked or not).

    /**
     * {@inheritDoc} <!--workaround-->
     * @see #add($type$)
     */
    @Override
    @ForceInline
    public final $abstractvectortype$ add(Vector<$Boxtype$> v) {
        return lanewise(ADD, v);
    }

    /**
     * Adds this vector to the broadcast of an input scalar.
     *
     * This is a lane-wise binary operation which applies
     * the primitive addition operation ({@code +}) to each lane.
     *
     * This method is also equivalent to the expression
     * {@link #lanewise(VectorOperators.Binary,$type$)
     *    lanewise}{@code (}{@link VectorOperators#ADD
     *    ADD}{@code , e)}.
     *
     * @param e the input scalar
     * @return the result of adding each lane of this vector to the scalar
     * @see #add(Vector)
     * @see #broadcast($type$)
     * @see #add($type$,VectorMask)
     * @see VectorOperators#ADD
     * @see #lanewise(VectorOperators.Binary,Vector)
     * @see #lanewise(VectorOperators.Binary,$type$)
     */
    @ForceInline
    public final
    $abstractvectortype$ add($type$ e) {
        return lanewise(ADD, e);
    }

    /**
     * {@inheritDoc} <!--workaround-->
     * @see #add($type$,VectorMask)
     */
    @Override
    @ForceInline
    public final $abstractvectortype$ add(Vector<$Boxtype$> v,
                                          VectorMask<$Boxtype$> m) {
        return lanewise(ADD, v, m);
    }

    /**
     * Adds this vector to the broadcast of an input scalar,
     * selecting lane elements controlled by a mask.
     *
     * This is a masked lane-wise binary operation which applies
     * the primitive addition operation ({@code +}) to each lane.
     *
     * This method is also equivalent to the expression
     * {@link #lanewise(VectorOperators.Binary,$type$,VectorMask)
     *    lanewise}{@code (}{@link VectorOperators#ADD
     *    ADD}{@code , s, m)}.
     *
     * @param e the input scalar
     * @param m the mask controlling lane selection
     * @return the result of adding each lane of this vector to the scalar
     * @see #add(Vector,VectorMask)
     * @see #broadcast($type$)
     * @see #add($type$)
     * @see VectorOperators#ADD
     * @see #lanewise(VectorOperators.Binary,Vector)
     * @see #lanewise(VectorOperators.Binary,$type$)
     */
    @ForceInline
    public final $abstractvectortype$ add($type$ e,
                                          VectorMask<$Boxtype$> m) {
        return lanewise(ADD, e, m);
    }

    /**
     * {@inheritDoc} <!--workaround-->
     * @see #sub($type$)
     */
    @Override
    @ForceInline
    public final $abstractvectortype$ sub(Vector<$Boxtype$> v) {
        return lanewise(SUB, v);
    }

    /**
     * Subtracts an input scalar from this vector.
     *
     * This is a masked lane-wise binary operation which applies
     * the primitive subtraction operation ({@code -}) to each lane.
     *
     * This method is also equivalent to the expression
     * {@link #lanewise(VectorOperators.Binary,$type$)
     *    lanewise}{@code (}{@link VectorOperators#SUB
     *    SUB}{@code , e)}.
     *
     * @param e the input scalar
     * @return the result of subtracting the scalar from each lane of this vector
     * @see #sub(Vector)
     * @see #broadcast($type$)
     * @see #sub($type$,VectorMask)
     * @see VectorOperators#SUB
     * @see #lanewise(VectorOperators.Binary,Vector)
     * @see #lanewise(VectorOperators.Binary,$type$)
     */
    @ForceInline
    public final $abstractvectortype$ sub($type$ e) {
        return lanewise(SUB, e);
    }

    /**
     * {@inheritDoc} <!--workaround-->
     * @see #sub($type$,VectorMask)
     */
    @Override
    @ForceInline
    public final $abstractvectortype$ sub(Vector<$Boxtype$> v,
                                          VectorMask<$Boxtype$> m) {
        return lanewise(SUB, v, m);
    }

    /**
     * Subtracts an input scalar from this vector
     * under the control of a mask.
     *
     * This is a masked lane-wise binary operation which applies
     * the primitive subtraction operation ({@code -}) to each lane.
     *
     * This method is also equivalent to the expression
     * {@link #lanewise(VectorOperators.Binary,$type$,VectorMask)
     *    lanewise}{@code (}{@link VectorOperators#SUB
     *    SUB}{@code , s, m)}.
     *
     * @param e the input scalar
     * @param m the mask controlling lane selection
     * @return the result of subtracting the scalar from each lane of this vector
     * @see #sub(Vector,VectorMask)
     * @see #broadcast($type$)
     * @see #sub($type$)
     * @see VectorOperators#SUB
     * @see #lanewise(VectorOperators.Binary,Vector)
     * @see #lanewise(VectorOperators.Binary,$type$)
     */
    @ForceInline
    public final $abstractvectortype$ sub($type$ e,
                                          VectorMask<$Boxtype$> m) {
        return lanewise(SUB, e, m);
    }

    /**
     * {@inheritDoc} <!--workaround-->
     * @see #mul($type$)
     */
    @Override
    @ForceInline
    public final $abstractvectortype$ mul(Vector<$Boxtype$> v) {
        return lanewise(MUL, v);
    }

    /**
     * Multiplies this vector by the broadcast of an input scalar.
     *
     * This is a lane-wise binary operation which applies
     * the primitive multiplication operation ({@code *}) to each lane.
     *
     * This method is also equivalent to the expression
     * {@link #lanewise(VectorOperators.Binary,$type$)
     *    lanewise}{@code (}{@link VectorOperators#MUL
     *    MUL}{@code , e)}.
     *
     * @param e the input scalar
     * @return the result of multiplying this vector by the given scalar
     * @see #mul(Vector)
     * @see #broadcast($type$)
     * @see #mul($type$,VectorMask)
     * @see VectorOperators#MUL
     * @see #lanewise(VectorOperators.Binary,Vector)
     * @see #lanewise(VectorOperators.Binary,$type$)
     */
    @ForceInline
    public final $abstractvectortype$ mul($type$ e) {
        return lanewise(MUL, e);
    }

    /**
     * {@inheritDoc} <!--workaround-->
     * @see #mul($type$,VectorMask)
     */
    @Override
    @ForceInline
    public final $abstractvectortype$ mul(Vector<$Boxtype$> v,
                                          VectorMask<$Boxtype$> m) {
        return lanewise(MUL, v, m);
    }

    /**
     * Multiplies this vector by the broadcast of an input scalar,
     * selecting lane elements controlled by a mask.
     *
     * This is a masked lane-wise binary operation which applies
     * the primitive multiplication operation ({@code *}) to each lane.
     *
     * This method is also equivalent to the expression
     * {@link #lanewise(VectorOperators.Binary,$type$,VectorMask)
     *    lanewise}{@code (}{@link VectorOperators#MUL
     *    MUL}{@code , s, m)}.
     *
     * @param e the input scalar
     * @param m the mask controlling lane selection
     * @return the result of muling each lane of this vector to the scalar
     * @see #mul(Vector,VectorMask)
     * @see #broadcast($type$)
     * @see #mul($type$)
     * @see VectorOperators#MUL
     * @see #lanewise(VectorOperators.Binary,Vector)
     * @see #lanewise(VectorOperators.Binary,$type$)
     */
    @ForceInline
    public final $abstractvectortype$ mul($type$ e,
                                          VectorMask<$Boxtype$> m) {
        return lanewise(MUL, e, m);
    }

    /**
     * {@inheritDoc} <!--workaround-->
#if[FP]
     * @apiNote Because the underlying scalar operator is an IEEE
     * floating point number, division by zero in fact will
     * not throw an exception, but will yield a signed
     * infinity or NaN.
#else[FP]
     * @apiNote If there is a zero divisor, {@code
     * ArithmeticException} will be thrown.
#end[FP]
     */
    @Override
    @ForceInline
    public final $abstractvectortype$ div(Vector<$Boxtype$> v) {
        return lanewise(DIV, v);
    }

    /**
     * Divides this vector by the broadcast of an input scalar.
     *
     * This is a lane-wise binary operation which applies
     * the primitive division operation ({@code /}) to each lane.
     *
     * This method is also equivalent to the expression
     * {@link #lanewise(VectorOperators.Binary,$type$)
     *    lanewise}{@code (}{@link VectorOperators#DIV
     *    DIV}{@code , e)}.
     *
#if[FP]
     * @apiNote Because the underlying scalar operator is an IEEE
     * floating point number, division by zero in fact will
     * not throw an exception, but will yield a signed
     * infinity or NaN.
#else[FP]
     * @apiNote If there is a zero divisor, {@code
     * ArithmeticException} will be thrown.
#end[FP]
     *
     * @param e the input scalar
     * @return the result of dividing each lane of this vector by the scalar
     * @see #div(Vector)
     * @see #broadcast($type$)
     * @see #div($type$,VectorMask)
     * @see VectorOperators#DIV
     * @see #lanewise(VectorOperators.Binary,Vector)
     * @see #lanewise(VectorOperators.Binary,$type$)
     */
    @ForceInline
    public final $abstractvectortype$ div($type$ e) {
        return lanewise(DIV, e);
    }

    /**
     * {@inheritDoc} <!--workaround-->
     * @see #div($type$,VectorMask)
#if[FP]
     * @apiNote Because the underlying scalar operator is an IEEE
     * floating point number, division by zero in fact will
     * not throw an exception, but will yield a signed
     * infinity or NaN.
#else[FP]
     * @apiNote If there is a zero divisor, {@code
     * ArithmeticException} will be thrown.
#end[FP]
     */
    @Override
    @ForceInline
    public final $abstractvectortype$ div(Vector<$Boxtype$> v,
                                          VectorMask<$Boxtype$> m) {
        return lanewise(DIV, v, m);
    }

    /**
     * Divides this vector by the broadcast of an input scalar,
     * selecting lane elements controlled by a mask.
     *
     * This is a masked lane-wise binary operation which applies
     * the primitive division operation ({@code /}) to each lane.
     *
     * This method is also equivalent to the expression
     * {@link #lanewise(VectorOperators.Binary,$type$,VectorMask)
     *    lanewise}{@code (}{@link VectorOperators#DIV
     *    DIV}{@code , s, m)}.
     *
#if[FP]
     * @apiNote Because the underlying scalar operator is an IEEE
     * floating point number, division by zero in fact will
     * not throw an exception, but will yield a signed
     * infinity or NaN.
#else[FP]
     * @apiNote If there is a zero divisor, {@code
     * ArithmeticException} will be thrown.
#end[FP]
     *
     * @param e the input scalar
     * @param m the mask controlling lane selection
     * @return the result of dividing each lane of this vector by the scalar
     * @see #div(Vector,VectorMask)
     * @see #broadcast($type$)
     * @see #div($type$)
     * @see VectorOperators#DIV
     * @see #lanewise(VectorOperators.Binary,Vector)
     * @see #lanewise(VectorOperators.Binary,$type$)
     */
    @ForceInline
    public final $abstractvectortype$ div($type$ e,
                                          VectorMask<$Boxtype$> m) {
        return lanewise(DIV, e, m);
    }

    /// END OF FULL-SERVICE BINARY METHODS

    /// SECOND-TIER BINARY METHODS
    //
    // There are no masked versions.

    /**
     * {@inheritDoc} <!--workaround-->
#if[FP]
     * @apiNote
     * For this method, floating point negative
     * zero {@code -0.0} is treated as a value distinct from, and less
     * than the default value (positive zero).
#end[FP]
     */
    @Override
    @ForceInline
    public final $abstractvectortype$ min(Vector<$Boxtype$> v) {
        return lanewise(MIN, v);
    }

    // FIXME:  "broadcast of an input scalar" is really wordy.  Reduce?
    /**
     * Computes the smaller of this vector and the broadcast of an input scalar.
     *
     * This is a lane-wise binary operation which applies the
     * operation {@code Math.min()} to each pair of
     * corresponding lane values.
     *
     * This method is also equivalent to the expression
     * {@link #lanewise(VectorOperators.Binary,$type$)
     *    lanewise}{@code (}{@link VectorOperators#MIN
     *    MIN}{@code , e)}.
     *
     * @param e the input scalar
     * @return the result of multiplying this vector by the given scalar
     * @see #min(Vector)
     * @see #broadcast($type$)
     * @see VectorOperators#MIN
     * @see #lanewise(VectorOperators.Binary,$type$,VectorMask)
#if[FP]
     * @apiNote
     * For this method, floating point negative
     * zero {@code -0.0} is treated as a value distinct from, and less
     * than the default value (positive zero).
#end[FP]
     */
    @ForceInline
    public final $abstractvectortype$ min($type$ e) {
        return lanewise(MIN, e);
    }

    /**
     * {@inheritDoc} <!--workaround-->
#if[FP]
     * @apiNote
     * For this method, floating point negative
     * zero {@code -0.0} is treated as a value distinct from, and less
     * than the default value (positive zero).
#end[FP]
     */
    @Override
    @ForceInline
    public final $abstractvectortype$ max(Vector<$Boxtype$> v) {
        return lanewise(MAX, v);
    }

    /**
     * Computes the larger of this vector and the broadcast of an input scalar.
     *
     * This is a lane-wise binary operation which applies the
     * operation {@code Math.max()} to each pair of
     * corresponding lane values.
     *
     * This method is also equivalent to the expression
     * {@link #lanewise(VectorOperators.Binary,$type$)
     *    lanewise}{@code (}{@link VectorOperators#MAX
     *    MAX}{@code , e)}.
     *
     * @param e the input scalar
     * @return the result of multiplying this vector by the given scalar
     * @see #max(Vector)
     * @see #broadcast($type$)
     * @see VectorOperators#MAX
     * @see #lanewise(VectorOperators.Binary,$type$,VectorMask)
#if[FP]
     * @apiNote
     * For this method, floating point negative
     * zero {@code -0.0} is treated as a value distinct from, and less
     * than the default value (positive zero).
#end[FP]
     */
    @ForceInline
    public final $abstractvectortype$ max($type$ e) {
        return lanewise(MAX, e);
    }

#if[BITWISE]
    // common bitwise operators: and, or, not (with scalar versions)
    /**
     * Computes the bitwise logical conjunction ({@code &})
     * of this vector and a second input vector.
     *
     * This is a lane-wise binary operation which applies
     * the primitive bitwise "and" operation ({@code &})
     * to each pair of corresponding lane values.
     *
     * This method is also equivalent to the expression
     * {@link #lanewise(VectorOperators.Binary,Vector)
     *    lanewise}{@code (}{@link VectorOperators#AND
     *    AND}{@code , v)}.
     *
     * <p>
     * This is not a full-service named operation like
     * {@link #add(Vector) add}.  A masked version of
     * this operation is not directly available
     * but may be obtained via the masked version of
     * {@code lanewise}.
     *
     * @param v a second input vector
     * @return the bitwise {@code &} of this vector and the second input vector
     * @see #and($type$)
     * @see #or(Vector)
     * @see #not()
     * @see VectorOperators#AND
     * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
     */
    @ForceInline
    public final $abstractvectortype$ and(Vector<$Boxtype$> v) {
        return lanewise(AND, v);
    }

    /**
     * Computes the bitwise logical conjunction ({@code &})
     * of this vector and a scalar.
     *
     * This is a lane-wise binary operation which applies
     * the primitive bitwise "and" operation ({@code &})
     * to each pair of corresponding lane values.
     *
     * This method is also equivalent to the expression
     * {@link #lanewise(VectorOperators.Binary,Vector)
     *    lanewise}{@code (}{@link VectorOperators#AND
     *    AND}{@code , e)}.
     *
     * @param e an input scalar
     * @return the bitwise {@code &} of this vector and scalar
     * @see #and(Vector)
     * @see VectorOperators#AND
     * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
     */
    @ForceInline
    public final $abstractvectortype$ and($type$ e) {
        return lanewise(AND, e);
    }

    /**
     * Computes the bitwise logical disjunction ({@code |})
     * of this vector and a second input vector.
     *
     * This is a lane-wise binary operation which applies
     * the primitive bitwise "or" operation ({@code |})
     * to each pair of corresponding lane values.
     *
     * This method is also equivalent to the expression
     * {@link #lanewise(VectorOperators.Binary,Vector)
     *    lanewise}{@code (}{@link VectorOperators#OR
     *    AND}{@code , v)}.
     *
     * <p>
     * This is not a full-service named operation like
     * {@link #add(Vector) add}.  A masked version of
     * this operation is not directly available
     * but may be obtained via the masked version of
     * {@code lanewise}.
     *
     * @param v a second input vector
     * @return the bitwise {@code |} of this vector and the second input vector
     * @see #or($type$)
     * @see #and(Vector)
     * @see #not()
     * @see VectorOperators#OR
     * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
     */
    @ForceInline
    public final $abstractvectortype$ or(Vector<$Boxtype$> v) {
        return lanewise(OR, v);
    }

    /**
     * Computes the bitwise logical disjunction ({@code |})
     * of this vector and a scalar.
     *
     * This is a lane-wise binary operation which applies
     * the primitive bitwise "or" operation ({@code |})
     * to each pair of corresponding lane values.
     *
     * This method is also equivalent to the expression
     * {@link #lanewise(VectorOperators.Binary,Vector)
     *    lanewise}{@code (}{@link VectorOperators#OR
     *    OR}{@code , e)}.
     *
     * @param e an input scalar
     * @return the bitwise {@code |} of this vector and scalar
     * @see #or(Vector)
     * @see VectorOperators#OR
     * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
     */
    @ForceInline
    public final $abstractvectortype$ or($type$ e) {
        return lanewise(OR, e);
    }

#end[BITWISE]

#if[FP]
    // common FP operator: pow
    /**
     * Raises this vector to the power of a second input vector.
     *
     * This is a lane-wise binary operation which applies an operation
     * conforming to the specification of
     * {@link Math#pow Math.pow(a,b)}
     * to each pair of corresponding lane values.
#if[intOrFloat]
     * The operation is adapted to cast the operands and the result,
     * specifically widening {@code float} operands to {@code double}
     * operands and narrowing the {@code double} result to a {@code float}
     * result.
#end[intOrFloat]
     *
     * This method is also equivalent to the expression
     * {@link #lanewise(VectorOperators.Binary,Vector)
     *    lanewise}{@code (}{@link VectorOperators#POW
     *    POW}{@code , b)}.
     *
     * <p>
     * This is not a full-service named operation like
     * {@link #add(Vector) add}.  A masked version of
     * this operation is not directly available
     * but may be obtained via the masked version of
     * {@code lanewise}.
     *
     * @param b a vector exponent by which to raise this vector
     * @return the {@code b}-th power of this vector
     * @see #pow($type$)
     * @see VectorOperators#POW
     * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
     */
    @ForceInline
    public final $abstractvectortype$ pow(Vector<$Boxtype$> b) {
        return lanewise(POW, b);
    }

    /**
     * Raises this vector to a scalar power.
     *
     * This is a lane-wise binary operation which applies an operation
     * conforming to the specification of
     * {@link Math#pow Math.pow(a,b)}
     * to each pair of corresponding lane values.
#if[intOrFloat]
     * The operation is adapted to cast the operands and the result,
     * specifically widening {@code float} operands to {@code double}
     * operands and narrowing the {@code double} result to a {@code float}
     * result.
#end[intOrFloat]
     *
     * This method is also equivalent to the expression
     * {@link #lanewise(VectorOperators.Binary,Vector)
     *    lanewise}{@code (}{@link VectorOperators#POW
     *    POW}{@code , b)}.
     *
     * @param b a scalar exponent by which to raise this vector
     * @return the {@code b}-th power of this vector
     * @see #pow(Vector)
     * @see VectorOperators#POW
     * @see #lanewise(VectorOperators.Binary,$type$,VectorMask)
     */
    @ForceInline
    public final $abstractvectortype$ pow($type$ b) {
        return lanewise(POW, b);
    }
#end[FP]

    /// UNARY METHODS

    /**
     * {@inheritDoc} <!--workaround-->
     */
    @Override
    @ForceInline
    public final
    $abstractvectortype$ neg() {
        return lanewise(NEG);
    }

    /**
     * {@inheritDoc} <!--workaround-->
     */
    @Override
    @ForceInline
    public final
    $abstractvectortype$ abs() {
        return lanewise(ABS);
    }

#if[!FP]
#if[!intOrLong]
    static int bitCount($type$ a) {
#if[short]
        return Integer.bitCount((int)a & 0xFFFF);
#else[short]
        return Integer.bitCount((int)a & 0xFF);
#end[short]
    }
#end[!intOrLong]
#end[!FP]
#if[!FP]
#if[!intOrLong]
    static int numberOfTrailingZeros($type$ a) {
#if[short]
        return a != 0 ? Integer.numberOfTrailingZeros(a) : 16;
#else[short]
        return a != 0 ? Integer.numberOfTrailingZeros(a) : 8;
#end[short]
    }
#end[!intOrLong]
#end[!FP]
#if[!FP]
#if[!intOrLong]
    static int numberOfLeadingZeros($type$ a) {
#if[short]
        return a >= 0 ? Integer.numberOfLeadingZeros(a) - 16 : 0;
#else[short]
        return a >= 0 ? Integer.numberOfLeadingZeros(a) - 24 : 0;
#end[short]
    }

    static $type$ reverse($type$ a) {
        if (a == 0 || a == -1) return a;

#if[short]
        $type$ b = rotateLeft(a, 8);
        b = ($type$) (((b & 0x5555) << 1) | ((b & 0xAAAA) >>> 1));
        b = ($type$) (((b & 0x3333) << 2) | ((b & 0xCCCC) >>> 2));
        b = ($type$) (((b & 0x0F0F) << 4) | ((b & 0xF0F0) >>> 4));
#else[short]
        $type$ b = rotateLeft(a, 4);
        b = ($type$) (((b & 0x55) << 1) | ((b & 0xAA) >>> 1));
        b = ($type$) (((b & 0x33) << 2) | ((b & 0xCC) >>> 2));
#end[short]
        return b;
    }
#end[!intOrLong]
#end[!FP]

#if[BITWISE]
    // not (~)
    /**
     * Computes the bitwise logical complement ({@code ~})
     * of this vector.
     *
     * This is a lane-wise binary operation which applies
     * the primitive bitwise "not" operation ({@code ~})
     * to each lane value.
     *
     * This method is also equivalent to the expression
     * {@link #lanewise(VectorOperators.Unary)
     *    lanewise}{@code (}{@link VectorOperators#NOT
     *    NOT}{@code )}.
     *
     * <p>
     * This is not a full-service named operation like
     * {@link #add(Vector) add}.  A masked version of
     * this operation is not directly available
     * but may be obtained via the masked version of
     * {@code lanewise}.
     *
     * @return the bitwise complement {@code ~} of this vector
     * @see #and(Vector)
     * @see VectorOperators#NOT
     * @see #lanewise(VectorOperators.Unary,VectorMask)
     */
    @ForceInline
    public final $abstractvectortype$ not() {
        return lanewise(NOT);
    }
#end[BITWISE]

#if[FP]
    // sqrt
    /**
     * Computes the square root of this vector.
     *
     * This is a lane-wise unary operation which applies an operation
     * conforming to the specification of
     * {@link Math#sqrt Math.sqrt(a)}
     * to each lane value.
#if[intOrFloat]
     * The operation is adapted to cast the operand and the result,
     * specifically widening the {@code float} operand to a {@code double}
     * operand and narrowing the {@code double} result to a {@code float}
     * result.
#end[intOrFloat]
     *
     * This method is also equivalent to the expression
     * {@link #lanewise(VectorOperators.Unary)
     *    lanewise}{@code (}{@link VectorOperators#SQRT
     *    SQRT}{@code )}.
     *
     * @return the square root of this vector
     * @see VectorOperators#SQRT
     * @see #lanewise(VectorOperators.Unary,VectorMask)
     */
    @ForceInline
    public final $abstractvectortype$ sqrt() {
        return lanewise(SQRT);
    }
#end[FP]

    /// COMPARISONS

    /**
     * {@inheritDoc} <!--workaround-->
     */
    @Override
    @ForceInline
    public final
    VectorMask<$Boxtype$> eq(Vector<$Boxtype$> v) {
        return compare(EQ, v);
    }

    /**
     * Tests if this vector is equal to an input scalar.
     *
     * This is a lane-wise binary test operation which applies
     * the primitive equals operation ({@code ==}) to each lane.
     * The result is the same as {@code compare(VectorOperators.Comparison.EQ, e)}.
     *
     * @param e the input scalar
     * @return the result mask of testing if this vector
     *         is equal to {@code e}
     * @see #compare(VectorOperators.Comparison,$type$)
     */
    @ForceInline
    public final
    VectorMask<$Boxtype$> eq($type$ e) {
        return compare(EQ, e);
    }

    /**
     * {@inheritDoc} <!--workaround-->
     */
    @Override
    @ForceInline
    public final
    VectorMask<$Boxtype$> lt(Vector<$Boxtype$> v) {
        return compare(LT, v);
    }

    /**
     * Tests if this vector is less than an input scalar.
     *
     * This is a lane-wise binary test operation which applies
     * the primitive less than operation ({@code <}) to each lane.
     * The result is the same as {@code compare(VectorOperators.LT, e)}.
     *
     * @param e the input scalar
     * @return the mask result of testing if this vector
     *         is less than the input scalar
     * @see #compare(VectorOperators.Comparison,$type$)
     */
    @ForceInline
    public final
    VectorMask<$Boxtype$> lt($type$ e) {
        return compare(LT, e);
    }

    /**
     * {@inheritDoc} <!--workaround-->
     */
    @Override
    public abstract
    VectorMask<$Boxtype$> test(VectorOperators.Test op);

    /*package-private*/
    @ForceInline
    final
    <M extends VectorMask<$Boxtype$>>
    M testTemplate(Class<M> maskType, Test op) {
        $Type$Species vsp = vspecies();
        if (opKind(op, VO_SPECIAL)) {
#if[FP]
            $Bitstype$Vector bits = this.viewAsIntegralLanes();
#end[FP]
            VectorMask<$Boxbitstype$> m;
            if (op == IS_DEFAULT) {
                m = {#if[FP]?bits.}compare(EQ, ($bitstype$) 0);
            } else if (op == IS_NEGATIVE) {
                m = {#if[FP]?bits.}compare(LT, ($bitstype$) 0);
            }
#if[FP]
            else if (op == IS_FINITE ||
                     op == IS_NAN ||
                     op == IS_INFINITE) {
                // first kill the sign:
                bits = bits.and($Boxbitstype$.MAX_VALUE);
                // next find the bit pattern for infinity:
                $bitstype$ infbits = ($bitstype$) toBits($Boxtype$.POSITIVE_INFINITY);
                // now compare:
                if (op == IS_FINITE) {
                    m = bits.compare(LT, infbits);
                } else if (op == IS_NAN) {
                    m = bits.compare(GT, infbits);
                } else {
                    m = bits.compare(EQ, infbits);
                }
            }
#end[FP]
            else {
                throw new AssertionError(op);
            }
            return maskType.cast(m{#if[FP]?.cast(vsp)});
        }
        int opc = opCode(op);
        throw new AssertionError(op);
    }

    /**
     * {@inheritDoc} <!--workaround-->
     */
    @Override
    public abstract
    VectorMask<$Boxtype$> test(VectorOperators.Test op,
                                  VectorMask<$Boxtype$> m);

    /*package-private*/
    @ForceInline
    final
    <M extends VectorMask<$Boxtype$>>
    M testTemplate(Class<M> maskType, Test op, M mask) {
        $Type$Species vsp = vspecies();
        mask.check(maskType, this);
        if (opKind(op, VO_SPECIAL)) {
#if[FP]
            $Bitstype$Vector bits = this.viewAsIntegralLanes();
            VectorMask<$Boxbitstype$> m = mask.cast($Bitstype$Vector.species(shape()));
#else[FP]
            VectorMask<$Boxbitstype$> m = mask;
#end[FP]
            if (op == IS_DEFAULT) {
                m = {#if[FP]?bits.}compare(EQ, ($bitstype$) 0, m);
            } else if (op == IS_NEGATIVE) {
                m = {#if[FP]?bits.}compare(LT, ($bitstype$) 0, m);
            }
#if[FP]
            else if (op == IS_FINITE ||
                     op == IS_NAN ||
                     op == IS_INFINITE) {
                // first kill the sign:
                bits = bits.and($Boxbitstype$.MAX_VALUE);
                // next find the bit pattern for infinity:
                $bitstype$ infbits = ($bitstype$) toBits($Boxtype$.POSITIVE_INFINITY);
                // now compare:
                if (op == IS_FINITE) {
                    m = bits.compare(LT, infbits, m);
                } else if (op == IS_NAN) {
                    m = bits.compare(GT, infbits, m);
                } else {
                    m = bits.compare(EQ, infbits, m);
                }
            }
#end[FP]
            else {
                throw new AssertionError(op);
            }
            return maskType.cast(m{#if[FP]?.cast(vsp)});
        }
        int opc = opCode(op);
        throw new AssertionError(op);
    }

    /**
     * {@inheritDoc} <!--workaround-->
     */
    @Override
    public abstract
    VectorMask<$Boxtype$> compare(VectorOperators.Comparison op, Vector<$Boxtype$> v);

    /*package-private*/
    @ForceInline
    final
    <M extends VectorMask<$Boxtype$>>
    M compareTemplate(Class<M> maskType, Comparison op, Vector<$Boxtype$> v) {
        $abstractvectortype$ that = ($abstractvectortype$) v;
        that.check(this);
        int opc = opCode(op);
        return VectorSupport.compare(
            opc, getClass(), maskType, $type$.class, length(),
            this, that, null,
            (cond, v0, v1, m1) -> {
                AbstractMask<$Boxtype$> m
                    = v0.bTest(cond, v1, (cond_, i, a, b)
                               -> compareWithOp(cond, a, b));
                @SuppressWarnings("unchecked")
                M m2 = (M) m;
                return m2;
            });
    }

    /*package-private*/
    @ForceInline
    final
    <M extends VectorMask<$Boxtype$>>
    M compareTemplate(Class<M> maskType, Comparison op, Vector<$Boxtype$> v, M m) {
        $abstractvectortype$ that = ($abstractvectortype$) v;
        that.check(this);
        m.check(maskType, this);
        int opc = opCode(op);
        return VectorSupport.compare(
            opc, getClass(), maskType, $type$.class, length(),
            this, that, m,
            (cond, v0, v1, m1) -> {
                AbstractMask<$Boxtype$> cmpM
                    = v0.bTest(cond, v1, (cond_, i, a, b)
                               -> compareWithOp(cond, a, b));
                @SuppressWarnings("unchecked")
                M m2 = (M) cmpM.and(m1);
                return m2;
            });
    }

    @ForceInline
    private static boolean compareWithOp(int cond, $type$ a, $type$ b) {
        return switch (cond) {
            case BT_eq -> a == b;
            case BT_ne -> a != b;
            case BT_lt -> a < b;
            case BT_le -> a <= b;
            case BT_gt -> a > b;
            case BT_ge -> a >= b;
#if[!FP]
            case BT_ult -> $Boxtype$.compareUnsigned(a, b) < 0;
            case BT_ule -> $Boxtype$.compareUnsigned(a, b) <= 0;
            case BT_ugt -> $Boxtype$.compareUnsigned(a, b) > 0;
            case BT_uge -> $Boxtype$.compareUnsigned(a, b) >= 0;
#end[!FP]
            default -> throw new AssertionError();
        };
    }

    /**
     * Tests this vector by comparing it with an input scalar,
     * according to the given comparison operation.
     *
     * This is a lane-wise binary test operation which applies
     * the comparison operation to each lane.
     * <p>
     * The result is the same as
     * {@code compare(op, broadcast(species(), e))}.
     * That is, the scalar may be regarded as broadcast to
     * a vector of the same species, and then compared
     * against the original vector, using the selected
     * comparison operation.
     *
     * @param op the operation used to compare lane values
     * @param e the input scalar
     * @return the mask result of testing lane-wise if this vector
     *         compares to the input, according to the selected
     *         comparison operator
     * @see $abstractvectortype$#compare(VectorOperators.Comparison,Vector)
     * @see #eq($type$)
     * @see #lt($type$)
     */
    public abstract
    VectorMask<$Boxtype$> compare(Comparison op, $type$ e);

    /*package-private*/
    @ForceInline
    final
    <M extends VectorMask<$Boxtype$>>
    M compareTemplate(Class<M> maskType, Comparison op, $type$ e) {
        return compareTemplate(maskType, op, broadcast(e));
    }

    /**
     * Tests this vector by comparing it with an input scalar,
     * according to the given comparison operation,
     * in lanes selected by a mask.
     *
     * This is a masked lane-wise binary test operation which applies
     * to each pair of corresponding lane values.
     *
     * The returned result is equal to the expression
     * {@code compare(op,s).and(m)}.
     *
     * @param op the operation used to compare lane values
     * @param e the input scalar
     * @param m the mask controlling lane selection
     * @return the mask result of testing lane-wise if this vector
     *         compares to the input, according to the selected
     *         comparison operator,
     *         and only in the lanes selected by the mask
     * @see $abstractvectortype$#compare(VectorOperators.Comparison,Vector,VectorMask)
     */
    @ForceInline
    public final VectorMask<$Boxtype$> compare(VectorOperators.Comparison op,
                                               $type$ e,
                                               VectorMask<$Boxtype$> m) {
        return compare(op, broadcast(e), m);
    }

#if[!long]
    /**
     * {@inheritDoc} <!--workaround-->
     */
    @Override
    public abstract
    VectorMask<$Boxtype$> compare(Comparison op, long e);

    /*package-private*/
    @ForceInline
    final
    <M extends VectorMask<$Boxtype$>>
    M compareTemplate(Class<M> maskType, Comparison op, long e) {
        return compareTemplate(maskType, op, broadcast(e));
    }

    /**
     * {@inheritDoc} <!--workaround-->
     */
    @Override
    @ForceInline
    public final
    VectorMask<$Boxtype$> compare(Comparison op, long e, VectorMask<$Boxtype$> m) {
        return compare(op, broadcast(e), m);
    }


#end[!long]

    /**
     * {@inheritDoc} <!--workaround-->
     */
    @Override public abstract
    $abstractvectortype$ blend(Vector<$Boxtype$> v, VectorMask<$Boxtype$> m);

    /*package-private*/
    @ForceInline
    final
    <M extends VectorMask<$Boxtype$>>
    $abstractvectortype$
    blendTemplate(Class<M> maskType, $abstractvectortype$ v, M m) {
        v.check(this);
        return VectorSupport.blend(
            getClass(), maskType, $type$.class, length(),
            this, v, m,
            (v0, v1, m_) -> v0.bOp(v1, m_, (i, a, b) -> b));
    }

    /**
     * {@inheritDoc} <!--workaround-->
     */
    @Override public abstract $abstractvectortype$ addIndex(int scale);

    /*package-private*/
    @ForceInline
    final $abstractvectortype$ addIndexTemplate(int scale) {
        $Type$Species vsp = vspecies();
        // make sure VLENGTH*scale doesn't overflow:
        vsp.checkScale(scale);
        return VectorSupport.indexVector(
            getClass(), $type$.class, length(),
            this, scale, vsp,
            (v, scale_, s)
            -> {
                // If the platform doesn't support an INDEX
                // instruction directly, load IOTA from memory
                // and multiply.
                $abstractvectortype$ iota = s.iota();
                $type$ sc = ($type$) scale_;
                return v.add(sc == 1 ? iota : iota.mul(sc));
            });
    }

    /**
     * Replaces selected lanes of this vector with
     * a scalar value
     * under the control of a mask.
     *
     * This is a masked lane-wise binary operation which
     * selects each lane value from one or the other input.
     *
     * The returned result is equal to the expression
     * {@code blend(broadcast(e),m)}.
     *
     * @param e the input scalar, containing the replacement lane value
     * @param m the mask controlling lane selection of the scalar
     * @return the result of blending the lane elements of this vector with
     *         the scalar value
     */
    @ForceInline
    public final $abstractvectortype$ blend($type$ e,
                                            VectorMask<$Boxtype$> m) {
        return blend(broadcast(e), m);
    }

#if[!long]
    /**
     * Replaces selected lanes of this vector with
     * a scalar value
     * under the control of a mask.
     *
     * This is a masked lane-wise binary operation which
     * selects each lane value from one or the other input.
     *
     * The returned result is equal to the expression
     * {@code blend(broadcast(e),m)}.
     *
     * @param e the input scalar, containing the replacement lane value
     * @param m the mask controlling lane selection of the scalar
     * @return the result of blending the lane elements of this vector with
     *         the scalar value
     */
    @ForceInline
    public final $abstractvectortype$ blend(long e,
                                            VectorMask<$Boxtype$> m) {
        return blend(broadcast(e), m);
    }
#end[!long]

    /**
     * {@inheritDoc} <!--workaround-->
     */
    @Override
    public abstract
    $abstractvectortype$ slice(int origin, Vector<$Boxtype$> v1);

    /*package-private*/
    final
    @ForceInline
    $abstractvectortype$ sliceTemplate(int origin, Vector<$Boxtype$> v1) {
        $abstractvectortype$ that = ($abstractvectortype$) v1;
        that.check(this);
        Objects.checkIndex(origin, length() + 1);
        $Bitstype$Vector iotaVector = ($Bitstype$Vector) iotaShuffle().toBitsVector();
#if[FP]
        $Bitstype$Vector filter = $Bitstype$Vector.broadcast(($Bitstype$Vector.$Bitstype$Species) vspecies().asIntegral(), ($bitstype$)(length() - origin));
        VectorMask<$Boxtype$> blendMask = iotaVector.compare(VectorOperators.LT, filter).cast(vspecies());
#else[FP]
        $abstractvectortype$ filter = broadcast(($type$)(length() - origin));
        VectorMask<$Boxtype$> blendMask = iotaVector.compare(VectorOperators.LT, filter);
#end[FP]
        AbstractShuffle<$Boxtype$> iota = iotaShuffle(origin, 1, true);
        return that.rearrange(iota).blend(this.rearrange(iota), blendMask);
    }

    /**
     * {@inheritDoc} <!--workaround-->
     */
    @Override
    @ForceInline
    public final
    $abstractvectortype$ slice(int origin,
                               Vector<$Boxtype$> w,
                               VectorMask<$Boxtype$> m) {
        return broadcast(0).blend(slice(origin, w), m);
    }

    /**
     * {@inheritDoc} <!--workaround-->
     */
    @Override
    public abstract
    $abstractvectortype$ slice(int origin);

    /*package-private*/
    final
    @ForceInline
    $abstractvectortype$ sliceTemplate(int origin) {
        Objects.checkIndex(origin, length() + 1);
        $Bitstype$Vector iotaVector = ($Bitstype$Vector) iotaShuffle().toBitsVector();
#if[FP]
        $Bitstype$Vector filter = $Bitstype$Vector.broadcast(($Bitstype$Vector.$Bitstype$Species) vspecies().asIntegral(), ($bitstype$)(length() - origin));
        VectorMask<$Boxtype$> blendMask = iotaVector.compare(VectorOperators.LT, filter).cast(vspecies());
#else[FP]
        $abstractvectortype$ filter = broadcast(($type$)(length() - origin));
        VectorMask<$Boxtype$> blendMask = iotaVector.compare(VectorOperators.LT, filter);
#end[FP]
        AbstractShuffle<$Boxtype$> iota = iotaShuffle(origin, 1, true);
        return vspecies().zero().blend(this.rearrange(iota), blendMask);
    }

    /**
     * {@inheritDoc} <!--workaround-->
     */
    @Override
    public abstract
    $abstractvectortype$ unslice(int origin, Vector<$Boxtype$> w, int part);

    /*package-private*/
    final
    @ForceInline
    $abstractvectortype$
    unsliceTemplate(int origin, Vector<$Boxtype$> w, int part) {
        $abstractvectortype$ that = ($abstractvectortype$) w;
        that.check(this);
        Objects.checkIndex(origin, length() + 1);
        $Bitstype$Vector iotaVector = ($Bitstype$Vector) iotaShuffle().toBitsVector();
#if[FP]
        $Bitstype$Vector filter = $Bitstype$Vector.broadcast(($Bitstype$Vector.$Bitstype$Species) vspecies().asIntegral(), ($bitstype$)origin);
        VectorMask<$Boxtype$> blendMask = iotaVector.compare((part == 0) ? VectorOperators.GE : VectorOperators.LT, filter).cast(vspecies());
#else[FP]
        $abstractvectortype$ filter = broadcast(($type$)origin);
        VectorMask<$Boxtype$> blendMask = iotaVector.compare((part == 0) ? VectorOperators.GE : VectorOperators.LT, filter);
#end[FP]
        AbstractShuffle<$Boxtype$> iota = iotaShuffle(-origin, 1, true);
        return that.blend(this.rearrange(iota), blendMask);
    }

    /*package-private*/
    final
    @ForceInline
    <M extends VectorMask<$Boxtype$>>
    $abstractvectortype$
    unsliceTemplate(Class<M> maskType, int origin, Vector<$Boxtype$> w, int part, M m) {
        $abstractvectortype$ that = ($abstractvectortype$) w;
        that.check(this);
        $abstractvectortype$ slice = that.sliceTemplate(origin, that);
        slice = slice.blendTemplate(maskType, this, m);
        return slice.unsliceTemplate(origin, w, part);
    }

    /**
     * {@inheritDoc} <!--workaround-->
     */
    @Override
    public abstract
    $abstractvectortype$ unslice(int origin, Vector<$Boxtype$> w, int part, VectorMask<$Boxtype$> m);

    /**
     * {@inheritDoc} <!--workaround-->
     */
    @Override
    public abstract
    $abstractvectortype$ unslice(int origin);

    /*package-private*/
    final
    @ForceInline
    $abstractvectortype$
    unsliceTemplate(int origin) {
        Objects.checkIndex(origin, length() + 1);
        $Bitstype$Vector iotaVector = ($Bitstype$Vector) iotaShuffle().toBitsVector();
#if[FP]
        $Bitstype$Vector filter = $Bitstype$Vector.broadcast(($Bitstype$Vector.$Bitstype$Species) vspecies().asIntegral(), ($bitstype$)origin);
        VectorMask<$Boxtype$> blendMask = iotaVector.compare(VectorOperators.GE, filter).cast(vspecies());
#else[FP]
        $abstractvectortype$ filter = broadcast(($type$)origin);
        VectorMask<$Boxtype$> blendMask = iotaVector.compare(VectorOperators.GE, filter);
#end[FP]
        AbstractShuffle<$Boxtype$> iota = iotaShuffle(-origin, 1, true);
        return vspecies().zero().blend(this.rearrange(iota), blendMask);
    }

    private ArrayIndexOutOfBoundsException
    wrongPartForSlice(int part) {
        String msg = String.format("bad part number %d for slice operation",
                                   part);
        return new ArrayIndexOutOfBoundsException(msg);
    }

    /**
     * {@inheritDoc} <!--workaround-->
     */
    @Override
    public abstract
    $abstractvectortype$ rearrange(VectorShuffle<$Boxtype$> shuffle);

    /*package-private*/
    @ForceInline
    final
    <S extends VectorShuffle<$Boxtype$>>
    $abstractvectortype$ rearrangeTemplate(Class<S> shuffletype, S shuffle) {
        Objects.requireNonNull(shuffle);
        return VectorSupport.rearrangeOp(
            getClass(), shuffletype, null, $type$.class, length(),
            this, shuffle, null,
            (v1, s_, m_) -> v1.uOp((i, a) -> {
                int ei = Integer.remainderUnsigned(s_.laneSource(i), v1.length());
                return v1.lane(ei);
            }));
    }

    /**
     * {@inheritDoc} <!--workaround-->
     */
    @Override
    public abstract
    $abstractvectortype$ rearrange(VectorShuffle<$Boxtype$> s,
                                   VectorMask<$Boxtype$> m);

    /*package-private*/
    @ForceInline
    final
    <S extends VectorShuffle<$Boxtype$>, M extends VectorMask<$Boxtype$>>
    $abstractvectortype$ rearrangeTemplate(Class<S> shuffletype,
                                           Class<M> masktype,
                                           S shuffle,
                                           M m) {
        Objects.requireNonNull(shuffle);
        m.check(masktype, this);
        return VectorSupport.rearrangeOp(
                   getClass(), shuffletype, masktype, $type$.class, length(),
                   this, shuffle, m,
                   (v1, s_, m_) -> v1.uOp((i, a) -> {
                        int ei = Integer.remainderUnsigned(s_.laneSource(i), v1.length());
                        return !m_.laneIsSet(i) ? 0 : v1.lane(ei);
                   }));
    }

    /**
     * {@inheritDoc} <!--workaround-->
     */
    @Override
    public abstract
    $abstractvectortype$ rearrange(VectorShuffle<$Boxtype$> s,
                                   Vector<$Boxtype$> v);

    /*package-private*/
    @ForceInline
    final
    <S extends VectorShuffle<$Boxtype$>>
    $abstractvectortype$ rearrangeTemplate(Class<S> shuffletype,
                                           S shuffle,
                                           $abstractvectortype$ v) {
        VectorMask<$Boxtype$> valid = shuffle.laneIsValid();
        $abstractvectortype$ r0 =
            VectorSupport.rearrangeOp(
                getClass(), shuffletype, null, $type$.class, length(),
                this, shuffle, null,
                (v0, s_, m_) -> v0.uOp((i, a) -> {
                    int ei = Integer.remainderUnsigned(s_.laneSource(i), v0.length());
                    return v0.lane(ei);
                }));
        $abstractvectortype$ r1 =
            VectorSupport.rearrangeOp(
                getClass(), shuffletype, null, $type$.class, length(),
                v, shuffle, null,
                (v1, s_, m_) -> v1.uOp((i, a) -> {
                    int ei = Integer.remainderUnsigned(s_.laneSource(i), v1.length());
                    return v1.lane(ei);
                }));
        return r1.blend(r0, valid);
    }

    @Override
    @ForceInline
    final <F> VectorShuffle<F> bitsToShuffle0(AbstractSpecies<F> dsp) {
#if[FP]
        throw new AssertionError();
#else[FP]
        assert(dsp.length() == vspecies().length());
        $type$[] a = toArray();
        int[] sa = new int[a.length];
        for (int i = 0; i < a.length; i++) {
            sa[i] = (int) a[i];
        }
        return VectorShuffle.fromArray(dsp, sa, 0);
#end[FP]
    }

    @ForceInline
    final <F>
    VectorShuffle<F> toShuffle(AbstractSpecies<F> dsp, boolean wrap) {
        assert(dsp.elementSize() == vspecies().elementSize());
#if[float]
        IntVector idx = convert(VectorOperators.F2I, 0).reinterpretAsInts();
#end[float]
#if[double]
        LongVector idx = convert(VectorOperators.D2L, 0).reinterpretAsLongs();
#end[double]
#if[!FP]
        $Type$Vector idx = this;
#end[!FP]
        $Bitstype$Vector wrapped = idx.lanewise(VectorOperators.AND, length() - 1);
        if (!wrap) {
            $Bitstype$Vector wrappedEx = wrapped.lanewise(VectorOperators.SUB, length());
            VectorMask<$Boxbitstype$> inBound = wrapped.compare(VectorOperators.EQ, idx);
            wrapped = wrappedEx.blend(wrapped, inBound);
        }
        return wrapped.bitsToShuffle(dsp);
    }

    /**
     * {@inheritDoc} <!--workaround-->
     * @since 19
     */
    @Override
    public abstract
    $Type$Vector compress(VectorMask<$Boxtype$> m);

    /*package-private*/
    @ForceInline
    final
    <M extends AbstractMask<$Boxtype$>>
    $Type$Vector compressTemplate(Class<M> masktype, M m) {
      m.check(masktype, this);
      return ($Type$Vector) VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_COMPRESS, getClass(), masktype,
                                                        $type$.class, length(), this, m,
                                                        (v1, m1) -> compressHelper(v1, m1));
    }

    /**
     * {@inheritDoc} <!--workaround-->
     * @since 19
     */
    @Override
    public abstract
    $Type$Vector expand(VectorMask<$Boxtype$> m);

    /*package-private*/
    @ForceInline
    final
    <M extends AbstractMask<$Boxtype$>>
    $Type$Vector expandTemplate(Class<M> masktype, M m) {
      m.check(masktype, this);
      return ($Type$Vector) VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_EXPAND, getClass(), masktype,
                                                        $type$.class, length(), this, m,
                                                        (v1, m1) -> expandHelper(v1, m1));
    }


    /**
     * {@inheritDoc} <!--workaround-->
     */
    @Override
    public abstract
    $abstractvectortype$ selectFrom(Vector<$Boxtype$> v);

    /*package-private*/
    @ForceInline
    final $abstractvectortype$ selectFromTemplate($abstractvectortype$ v) {
        return ($Type$Vector)VectorSupport.selectFromOp(getClass(), null, $type$.class,
                                                        length(), this, v, null,
                                                        (v1, v2, _m) ->
                                                         v2.rearrange(v1.toShuffle()));
    }

    /**
     * {@inheritDoc} <!--workaround-->
     */
    @Override
    public abstract
    $abstractvectortype$ selectFrom(Vector<$Boxtype$> s, VectorMask<$Boxtype$> m);

    /*package-private*/
    @ForceInline
    final
    <M extends VectorMask<$Boxtype$>>
    $abstractvectortype$ selectFromTemplate($abstractvectortype$ v,
                                            Class<M> masktype, M m) {
        m.check(masktype, this);
        return ($Type$Vector)VectorSupport.selectFromOp(getClass(), masktype, $type$.class,
                                                        length(), this, v, m,
                                                        (v1, v2, _m) ->
                                                         v2.rearrange(v1.toShuffle(), _m));
    }


    /**
     * {@inheritDoc} <!--workaround-->
     */
    @Override
    public abstract
    $abstractvectortype$ selectFrom(Vector<$Boxtype$> v1, Vector<$Boxtype$> v2);


    /*package-private*/
    @ForceInline
    final $abstractvectortype$ selectFromTemplate($abstractvectortype$ v1, $abstractvectortype$ v2) {
        return VectorSupport.selectFromTwoVectorOp(getClass(), $type$.class, length(), this, v1, v2,
                                                   (vec1, vec2, vec3) -> selectFromTwoVectorHelper(vec1, vec2, vec3));
    }

    /// Ternary operations

#if[BITWISE]
    /**
     * Blends together the bits of two vectors under
     * the control of a third, which supplies mask bits.
     *
     * This is a lane-wise ternary operation which performs
     * a bitwise blending operation {@code (a&~c)|(b&c)}
     * to each lane.
     *
     * This method is also equivalent to the expression
     * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
     *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
     *    BITWISE_BLEND}{@code , bits, mask)}.
     *
     * @param bits input bits to blend into the current vector
     * @param mask a bitwise mask to enable blending of the input bits
     * @return the bitwise blend of the given bits into the current vector,
     *         under control of the bitwise mask
     * @see #bitwiseBlend($type$,$type$)
     * @see #bitwiseBlend($type$,Vector)
     * @see #bitwiseBlend(Vector,$type$)
     * @see VectorOperators#BITWISE_BLEND
     * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
     */
    @ForceInline
    public final
    $abstractvectortype$ bitwiseBlend(Vector<$Boxtype$> bits, Vector<$Boxtype$> mask) {
        return lanewise(BITWISE_BLEND, bits, mask);
    }

    /**
     * Blends together the bits of a vector and a scalar under
     * the control of another scalar, which supplies mask bits.
     *
     * This is a lane-wise ternary operation which performs
     * a bitwise blending operation {@code (a&~c)|(b&c)}
     * to each lane.
     *
     * This method is also equivalent to the expression
     * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
     *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
     *    BITWISE_BLEND}{@code , bits, mask)}.
     *
     * @param bits input bits to blend into the current vector
     * @param mask a bitwise mask to enable blending of the input bits
     * @return the bitwise blend of the given bits into the current vector,
     *         under control of the bitwise mask
     * @see #bitwiseBlend(Vector,Vector)
     * @see VectorOperators#BITWISE_BLEND
     * @see #lanewise(VectorOperators.Ternary,$type$,$type$,VectorMask)
     */
    @ForceInline
    public final
    $abstractvectortype$ bitwiseBlend($type$ bits, $type$ mask) {
        return lanewise(BITWISE_BLEND, bits, mask);
    }

    /**
     * Blends together the bits of a vector and a scalar under
     * the control of another vector, which supplies mask bits.
     *
     * This is a lane-wise ternary operation which performs
     * a bitwise blending operation {@code (a&~c)|(b&c)}
     * to each lane.
     *
     * This method is also equivalent to the expression
     * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
     *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
     *    BITWISE_BLEND}{@code , bits, mask)}.
     *
     * @param bits input bits to blend into the current vector
     * @param mask a bitwise mask to enable blending of the input bits
     * @return the bitwise blend of the given bits into the current vector,
     *         under control of the bitwise mask
     * @see #bitwiseBlend(Vector,Vector)
     * @see VectorOperators#BITWISE_BLEND
     * @see #lanewise(VectorOperators.Ternary,$type$,Vector,VectorMask)
     */
    @ForceInline
    public final
    $abstractvectortype$ bitwiseBlend($type$ bits, Vector<$Boxtype$> mask) {
        return lanewise(BITWISE_BLEND, bits, mask);
    }

    /**
     * Blends together the bits of two vectors under
     * the control of a scalar, which supplies mask bits.
     *
     * This is a lane-wise ternary operation which performs
     * a bitwise blending operation {@code (a&~c)|(b&c)}
     * to each lane.
     *
     * This method is also equivalent to the expression
     * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
     *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
     *    BITWISE_BLEND}{@code , bits, mask)}.
     *
     * @param bits input bits to blend into the current vector
     * @param mask a bitwise mask to enable blending of the input bits
     * @return the bitwise blend of the given bits into the current vector,
     *         under control of the bitwise mask
     * @see #bitwiseBlend(Vector,Vector)
     * @see VectorOperators#BITWISE_BLEND
     * @see #lanewise(VectorOperators.Ternary,Vector,$type$,VectorMask)
     */
    @ForceInline
    public final
    $abstractvectortype$ bitwiseBlend(Vector<$Boxtype$> bits, $type$ mask) {
        return lanewise(BITWISE_BLEND, bits, mask);
    }
#end[BITWISE]

#if[FP]
    /**
     * Multiplies this vector by a second input vector, and sums
     * the result with a third.
     *
     * Extended precision is used for the intermediate result,
     * avoiding possible loss of precision from rounding once
     * for each of the two operations.
     * The result is numerically close to {@code this.mul(b).add(c)},
     * and is typically closer to the true mathematical result.
     *
     * This is a lane-wise ternary operation which applies an operation
     * conforming to the specification of
     * {@link Math#fma($type$,$type$,$type$) Math.fma(a,b,c)}
     * to each lane.
#if[intOrFloat]
     * The operation is adapted to cast the operands and the result,
     * specifically widening {@code float} operands to {@code double}
     * operands and narrowing the {@code double} result to a {@code float}
     * result.
#end[intOrFloat]
     *
     * This method is also equivalent to the expression
     * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
     *    lanewise}{@code (}{@link VectorOperators#FMA
     *    FMA}{@code , b, c)}.
     *
     * @param b the second input vector, supplying multiplier values
     * @param c the third input vector, supplying addend values
     * @return the product of this vector and the second input vector
     *         summed with the third input vector, using extended precision
     *         for the intermediate result
     * @see #fma($type$,$type$)
     * @see VectorOperators#FMA
     * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
     */
    @ForceInline
    public final
    $abstractvectortype$ fma(Vector<$Boxtype$> b, Vector<$Boxtype$> c) {
        return lanewise(FMA, b, c);
    }

    /**
     * Multiplies this vector by a scalar multiplier, and sums
     * the result with a scalar addend.
     *
     * Extended precision is used for the intermediate result,
     * avoiding possible loss of precision from rounding once
     * for each of the two operations.
     * The result is numerically close to {@code this.mul(b).add(c)},
     * and is typically closer to the true mathematical result.
     *
     * This is a lane-wise ternary operation which applies an operation
     * conforming to the specification of
     * {@link Math#fma($type$,$type$,$type$) Math.fma(a,b,c)}
     * to each lane.
#if[intOrFloat]
     * The operation is adapted to cast the operands and the result,
     * specifically widening {@code float} operands to {@code double}
     * operands and narrowing the {@code double} result to a {@code float}
     * result.
#end[intOrFloat]
     *
     * This method is also equivalent to the expression
     * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
     *    lanewise}{@code (}{@link VectorOperators#FMA
     *    FMA}{@code , b, c)}.
     *
     * @param b the scalar multiplier
     * @param c the scalar addend
     * @return the product of this vector and the scalar multiplier
     *         summed with scalar addend, using extended precision
     *         for the intermediate result
     * @see #fma(Vector,Vector)
     * @see VectorOperators#FMA
     * @see #lanewise(VectorOperators.Ternary,$type$,$type$,VectorMask)
     */
    @ForceInline
    public final
    $abstractvectortype$ fma($type$ b, $type$ c) {
        return lanewise(FMA, b, c);
    }

    // Don't bother with (Vector,$type$) and ($type$,Vector) overloadings.
#end[FP]

    // Type specific horizontal reductions

    /**
     * Returns a value accumulated from all the lanes of this vector.
     *
     * This is an associative cross-lane reduction operation which
     * applies the specified operation to all the lane elements.
     * <p>
     * A few reduction operations do not support arbitrary reordering
     * of their operands, yet are included here because of their
     * usefulness.
     * <ul>
     * <li>
     * In the case of {@code FIRST_NONZERO}, the reduction returns
     * the value from the lowest-numbered non-zero lane.
#if[FP]
     * (As with {@code MAX} and {@code MIN}, floating point negative
     * zero {@code -0.0} is treated as a value distinct from
     * the default value, positive zero. So a first-nonzero lane reduction
     * might return {@code -0.0} even in the presence of non-zero
     * lane values.)
     * <li>
     * In the case of {@code ADD} and {@code MUL}, the
     * precise result will reflect the choice of an arbitrary order
     * of operations, which may even vary over time.
     * For further details see the section
     * <a href="VectorOperators.html#fp_assoc">Operations on floating point vectors</a>.
#end[FP]
     * <li>
     * All other reduction operations are fully commutative and
     * associative.  The implementation can choose any order of
     * processing, yet it will always produce the same result.
     * </ul>
     *
     * @param op the operation used to combine lane values
     * @return the accumulated result
     * @throws UnsupportedOperationException if this vector does
     *         not support the requested operation
     * @see #reduceLanes(VectorOperators.Associative,VectorMask)
     * @see #add(Vector)
     * @see #mul(Vector)
     * @see #min(Vector)
     * @see #max(Vector)
#if[BITWISE]
     * @see #and(Vector)
     * @see #or(Vector)
     * @see VectorOperators#XOR
#end[BITWISE]
     * @see VectorOperators#FIRST_NONZERO
     */
    public abstract $type$ reduceLanes(VectorOperators.Associative op);

    /**
     * Returns a value accumulated from selected lanes of this vector,
     * controlled by a mask.
     *
     * This is an associative cross-lane reduction operation which
     * applies the specified operation to the selected lane elements.
     * <p>
     * If no elements are selected, an operation-specific identity
     * value is returned.
     * <ul>
     * <li>
     * If the operation is
#if[BITWISE]
     *  {@code ADD}, {@code XOR}, {@code OR},
#else[BITWISE]
     *  {@code ADD}
#end[BITWISE]
     * or {@code FIRST_NONZERO},
     * then the identity value is {#if[FP]?positive }zero, the default {@code $type$} value.
     * <li>
     * If the operation is {@code MUL},
     * then the identity value is one.
#if[BITWISE]
     * <li>
     * If the operation is {@code AND},
     * then the identity value is minus one (all bits set).
     * <li>
     * If the operation is {@code MAX},
     * then the identity value is {@code $Boxtype$.MIN_VALUE}.
     * <li>
     * If the operation is {@code MIN},
     * then the identity value is {@code $Boxtype$.MAX_VALUE}.
#end[BITWISE]
#if[FP]
     * <li>
     * If the operation is {@code MAX},
     * then the identity value is {@code $Boxtype$.NEGATIVE_INFINITY}.
     * <li>
     * If the operation is {@code MIN},
     * then the identity value is {@code $Boxtype$.POSITIVE_INFINITY}.
#end[FP]
     * </ul>
     * <p>
     * A few reduction operations do not support arbitrary reordering
     * of their operands, yet are included here because of their
     * usefulness.
     * <ul>
     * <li>
     * In the case of {@code FIRST_NONZERO}, the reduction returns
     * the value from the lowest-numbered non-zero lane.
#if[FP]
     * (As with {@code MAX} and {@code MIN}, floating point negative
     * zero {@code -0.0} is treated as a value distinct from
     * the default value, positive zero. So a first-nonzero lane reduction
     * might return {@code -0.0} even in the presence of non-zero
     * lane values.)
     * <li>
     * In the case of {@code ADD} and {@code MUL}, the
     * precise result will reflect the choice of an arbitrary order
     * of operations, which may even vary over time.
     * For further details see the section
     * <a href="VectorOperators.html#fp_assoc">Operations on floating point vectors</a>.
#end[FP]
     * <li>
     * All other reduction operations are fully commutative and
     * associative.  The implementation can choose any order of
     * processing, yet it will always produce the same result.
     * </ul>
     *
     * @param op the operation used to combine lane values
     * @param m the mask controlling lane selection
     * @return the reduced result accumulated from the selected lane values
     * @throws UnsupportedOperationException if this vector does
     *         not support the requested operation
     * @see #reduceLanes(VectorOperators.Associative)
     */
    public abstract $type$ reduceLanes(VectorOperators.Associative op,
                                       VectorMask<$Boxtype$> m);

    /*package-private*/
    @ForceInline
    final
    $type$ reduceLanesTemplate(VectorOperators.Associative op,
                               Class<? extends VectorMask<$Boxtype$>> maskClass,
                               VectorMask<$Boxtype$> m) {
        m.check(maskClass, this);
        if (op == FIRST_NONZERO) {
            // FIXME:  The JIT should handle this.
            $abstractvectortype$ v = broadcast(($type$) 0).blend(this, m);
            return v.reduceLanesTemplate(op);
        }
        int opc = opCode(op);
        return fromBits(VectorSupport.reductionCoerced(
            opc, getClass(), maskClass, $type$.class, length(),
            this, m,
            REDUCE_IMPL.find(op, opc, $abstractvectortype$::reductionOperations)));
    }

    /*package-private*/
    @ForceInline
    final
    $type$ reduceLanesTemplate(VectorOperators.Associative op) {
        if (op == FIRST_NONZERO) {
            // FIXME:  The JIT should handle this.
            VectorMask<$Boxbitstype$> thisNZ
                = this.viewAsIntegralLanes().compare(NE, ($bitstype$) 0);
            int ft = thisNZ.firstTrue();
            return ft < length() ? this.lane(ft) : ($type$) 0;
        }
        int opc = opCode(op);
        return fromBits(VectorSupport.reductionCoerced(
            opc, getClass(), null, $type$.class, length(),
            this, null,
            REDUCE_IMPL.find(op, opc, $abstractvectortype$::reductionOperations)));
    }

    private static final
    ImplCache<Associative, ReductionOperation<$abstractvectortype$, VectorMask<$Boxtype$>>>
        REDUCE_IMPL = new ImplCache<>(Associative.class, $Type$Vector.class);

    private static ReductionOperation<$abstractvectortype$, VectorMask<$Boxtype$>> reductionOperations(int opc_) {
        switch (opc_) {
            case VECTOR_OP_ADD: return (v, m) ->
                    toBits(v.rOp(($type$)0, m, (i, a, b) -> ($type$)(a + b)));
            case VECTOR_OP_MUL: return (v, m) ->
                    toBits(v.rOp(($type$)1, m, (i, a, b) -> ($type$)(a * b)));
            case VECTOR_OP_MIN: return (v, m) ->
                    toBits(v.rOp(MAX_OR_INF, m, (i, a, b) -> ($type$) Math.min(a, b)));
            case VECTOR_OP_MAX: return (v, m) ->
                    toBits(v.rOp(MIN_OR_INF, m, (i, a, b) -> ($type$) Math.max(a, b)));
#if[!FP]
            case VECTOR_OP_UMIN: return (v, m) ->
                    toBits(v.rOp(MAX_OR_INF, m, (i, a, b) -> ($type$) VectorMath.minUnsigned(a, b)));
            case VECTOR_OP_UMAX: return (v, m) ->
                    toBits(v.rOp(MIN_OR_INF, m, (i, a, b) -> ($type$) VectorMath.maxUnsigned(a, b)));
#end[!FP]
#if[BITWISE]
            case VECTOR_OP_AND: return (v, m) ->
                    toBits(v.rOp(($type$)-1, m, (i, a, b) -> ($type$)(a & b)));
            case VECTOR_OP_OR: return (v, m) ->
                    toBits(v.rOp(($type$)0, m, (i, a, b) -> ($type$)(a | b)));
            case VECTOR_OP_XOR: return (v, m) ->
                    toBits(v.rOp(($type$)0, m, (i, a, b) -> ($type$)(a ^ b)));
#end[BITWISE]
            default: return null;
        }
    }

#if[FP]
    private static final $type$ MIN_OR_INF = $Boxtype$.NEGATIVE_INFINITY;
    private static final $type$ MAX_OR_INF = $Boxtype$.POSITIVE_INFINITY;
#else[FP]
    private static final $type$ MIN_OR_INF = $Boxtype$.MIN_VALUE;
    private static final $type$ MAX_OR_INF = $Boxtype$.MAX_VALUE;
#end[FP]

    public @Override abstract long reduceLanesToLong(VectorOperators.Associative op);
    public @Override abstract long reduceLanesToLong(VectorOperators.Associative op,
                                                     VectorMask<$Boxtype$> m);

    // Type specific accessors

    /**
     * Gets the lane element at lane index {@code i}
     *
     * @param i the lane index
     * @return the lane element at lane index {@code i}
     * @throws IllegalArgumentException if the index is out of range
     * ({@code < 0 || >= length()})
     */
    public abstract $type$ lane(int i);

    /**
     * Replaces the lane element of this vector at lane index {@code i} with
     * value {@code e}.
     *
     * This is a cross-lane operation and behaves as if it returns the result
     * of blending this vector with an input vector that is the result of
     * broadcasting {@code e} and a mask that has only one lane set at lane
     * index {@code i}.
     *
     * @param i the lane index of the lane element to be replaced
     * @param e the value to be placed
     * @return the result of replacing the lane element of this vector at lane
     * index {@code i} with value {@code e}.
     * @throws IllegalArgumentException if the index is out of range
     * ({@code < 0 || >= length()})
     */
    public abstract $abstractvectortype$ withLane(int i, $type$ e);

    // Memory load operations

    /**
     * Returns an array of type {@code $type$[]}
     * containing all the lane values.
     * The array length is the same as the vector length.
     * The array elements are stored in lane order.
     * <p>
     * This method behaves as if it stores
     * this vector into an allocated array
     * (using {@link #intoArray($type$[], int) intoArray})
     * and returns the array as follows:
     * <pre>{@code
     *   $type$[] a = new $type$[this.length()];
     *   this.intoArray(a, 0);
     *   return a;
     * }</pre>
     *
     * @return an array containing the lane values of this vector
     */
    @ForceInline
    @Override
    public final $type$[] toArray() {
        $type$[] a = new $type$[vspecies().laneCount()];
        intoArray(a, 0);
        return a;
    }

#if[int]
    /**
     * {@inheritDoc} <!--workaround-->
     * This is an alias for {@link #toArray()}
     * When this method is used on vectors
     * of type {@code $abstractvectortype$},
     * there will be no loss of range or precision.
     */
    @ForceInline
    @Override
    public final int[] toIntArray() {
        return toArray();
    }
#else[int]
    /** {@inheritDoc} <!--workaround-->
#if[!FP]
#if[!long]
     * @implNote
     * When this method is used on vectors
     * of type {@code $abstractvectortype$},
     * there will be no loss of precision or range,
     * and so no {@code UnsupportedOperationException} will
     * be thrown.
#end[!long]
#end[!FP]
     */
    @ForceInline
    @Override
    public final int[] toIntArray() {
        $type$[] a = toArray();
        int[] res = new int[a.length];
        for (int i = 0; i < a.length; i++) {
            $type$ e = a[i];
            res[i] = (int) $Type$Species.toIntegralChecked(e, true);
        }
        return res;
    }
#end[int]

#if[long]
    /**
     * {@inheritDoc} <!--workaround-->
     * This is an alias for {@link #toArray()}
     * When this method is used on vectors
     * of type {@code $abstractvectortype$},
     * there will be no loss of range or precision.
     */
    @ForceInline
    @Override
    public final long[] toLongArray() {
        return toArray();
    }
#else[long]
    /** {@inheritDoc} <!--workaround-->
#if[!FP]
     * @implNote
     * When this method is used on vectors
     * of type {@code $abstractvectortype$},
     * there will be no loss of precision or range,
     * and so no {@code UnsupportedOperationException} will
     * be thrown.
#end[!FP]
     */
    @ForceInline
    @Override
    public final long[] toLongArray() {
        $type$[] a = toArray();
        long[] res = new long[a.length];
        for (int i = 0; i < a.length; i++) {
            $type$ e = a[i];
            res[i] = $Type$Species.toIntegralChecked(e, false);
        }
        return res;
    }
#end[long]

#if[double]
    /** {@inheritDoc} <!--workaround-->
     * @implNote
     * This is an alias for {@link #toArray()}
     * When this method is used on vectors
     * of type {@code $abstractvectortype$},
     * there will be no loss of precision.
     */
    @ForceInline
    @Override
    public final double[] toDoubleArray() {
        return toArray();
    }
#else[double]
    /** {@inheritDoc} <!--workaround-->
#if[long]
     * @implNote
     * When this method is used on vectors
     * of type {@code $abstractvectortype$},
     * up to nine bits of precision may be lost
     * for lane values of large magnitude.
#else[long]
     * @implNote
     * When this method is used on vectors
     * of type {@code $abstractvectortype$},
     * there will be no loss of precision.
#end[long]
     */
    @ForceInline
    @Override
    public final double[] toDoubleArray() {
        $type$[] a = toArray();
        double[] res = new double[a.length];
        for (int i = 0; i < a.length; i++) {
            res[i] = (double) a[i];
        }
        return res;
    }
#end[double]

    /**
     * Loads a vector from an array of type {@code $type$[]}
     * starting at an offset.
     * For each vector lane, where {@code N} is the vector lane index, the
     * array element at index {@code offset + N} is placed into the
     * resulting vector at lane index {@code N}.
     *
     * @param species species of desired vector
     * @param a the array
     * @param offset the offset into the array
     * @return the vector loaded from an array
     * @throws IndexOutOfBoundsException
     *         if {@code offset+N < 0} or {@code offset+N >= a.length}
     *         for any lane {@code N} in the vector
     */
    @ForceInline
    public static
    $abstractvectortype$ fromArray(VectorSpecies<$Boxtype$> species,
                                   $type$[] a, int offset) {
        offset = checkFromIndexSize(offset, species.length(), a.length);
        $Type$Species vsp = ($Type$Species) species;
        return vsp.dummyVector().fromArray0(a, offset);
    }

    /**
     * Loads a vector from an array of type {@code $type$[]}
     * starting at an offset and using a mask.
     * Lanes where the mask is unset are filled with the default
     * value of {@code $type$} ({#if[FP]?positive }zero).
     * For each vector lane, where {@code N} is the vector lane index,
     * if the mask lane at index {@code N} is set then the array element at
     * index {@code offset + N} is placed into the resulting vector at lane index
     * {@code N}, otherwise the default element value is placed into the
     * resulting vector at lane index {@code N}.
     *
     * @param species species of desired vector
     * @param a the array
     * @param offset the offset into the array
     * @param m the mask controlling lane selection
     * @return the vector loaded from an array
     * @throws IndexOutOfBoundsException
     *         if {@code offset+N < 0} or {@code offset+N >= a.length}
     *         for any lane {@code N} in the vector
     *         where the mask is set
     */
    @ForceInline
    public static
    $abstractvectortype$ fromArray(VectorSpecies<$Boxtype$> species,
                                   $type$[] a, int offset,
                                   VectorMask<$Boxtype$> m) {
        $Type$Species vsp = ($Type$Species) species;
        if (VectorIntrinsics.indexInRange(offset, vsp.length(), a.length)) {
            return vsp.dummyVector().fromArray0(a, offset, m, OFFSET_IN_RANGE);
        }

        ((AbstractMask<$Boxtype$>)m)
            .checkIndexByLane(offset, a.length, vsp.iota(), 1);
        return vsp.dummyVector().fromArray0(a, offset, m, OFFSET_OUT_OF_RANGE);
    }

    /**
     * Gathers a new vector composed of elements from an array of type
     * {@code $type$[]},
     * using indexes obtained by adding a fixed {@code offset} to a
     * series of secondary offsets from an <em>index map</em>.
     * The index map is a contiguous sequence of {@code VLENGTH}
     * elements in a second array of {@code int}s, starting at a given
     * {@code mapOffset}.
     * <p>
     * For each vector lane, where {@code N} is the vector lane index,
     * the lane is loaded from the array
     * element {@code a[f(N)]}, where {@code f(N)} is the
     * index mapping expression
     * {@code offset + indexMap[mapOffset + N]]}.
     *
     * @param species species of desired vector
     * @param a the array
     * @param offset the offset into the array, may be negative if relative
     * indexes in the index map compensate to produce a value within the
     * array bounds
     * @param indexMap the index map
     * @param mapOffset the offset into the index map
     * @return the vector loaded from the indexed elements of the array
     * @throws IndexOutOfBoundsException
     *         if {@code mapOffset+N < 0}
     *         or if {@code mapOffset+N >= indexMap.length},
     *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
     *         is an invalid index into {@code a},
     *         for any lane {@code N} in the vector
     * @see $abstractvectortype$#toIntArray()
     */
#if[byteOrShort]
    @ForceInline
    public static
    $abstractvectortype$ fromArray(VectorSpecies<$Boxtype$> species,
                                   $type$[] a, int offset,
                                   int[] indexMap, int mapOffset) {
        $Type$Species vsp = ($Type$Species) species;
        IntVector.IntSpecies isp = IntVector.species(vsp.indexShape());
        Objects.requireNonNull(a);
        Objects.requireNonNull(indexMap);
        Class<? extends $abstractvectortype$> vectorType = vsp.vectorType();


        // Constant folding should sweep out following conditonal logic.
        VectorSpecies<Integer> lsp;
        if (isp.length() > IntVector.SPECIES_PREFERRED.length()) {
            lsp = IntVector.SPECIES_PREFERRED;
        } else {
            lsp = isp;
        }

        // Check indices are within array bounds.
        for (int i = 0; i < vsp.length(); i += lsp.length()) {
            IntVector vix = IntVector
                .fromArray(lsp, indexMap, mapOffset + i)
                .add(offset);
            VectorIntrinsics.checkIndex(vix, a.length);
        }

        return VectorSupport.loadWithMap(
            vectorType, null, $type$.class, vsp.laneCount(),
            lsp.vectorType(),
            a, ARRAY_BASE, null, null,
            a, offset, indexMap, mapOffset, vsp,
            (c, idx, iMap, idy, s, vm) ->
            s.vOp(n -> c[idx + iMap[idy+n]]));
    }
#else[byteOrShort]
    @ForceInline
    public static
    $abstractvectortype$ fromArray(VectorSpecies<$Boxtype$> species,
                                   $type$[] a, int offset,
                                   int[] indexMap, int mapOffset) {
        $Type$Species vsp = ($Type$Species) species;
        IntVector.IntSpecies isp = IntVector.species(vsp.indexShape());
        Objects.requireNonNull(a);
        Objects.requireNonNull(indexMap);
        Class<? extends $abstractvectortype$> vectorType = vsp.vectorType();

#if[longOrDouble]
        if (vsp.laneCount() == 1) {
          return $abstractvectortype$.fromArray(vsp, a, offset + indexMap[mapOffset]);
        }

        // Index vector: vix[0:n] = k -> offset + indexMap[mapOffset + k]
        IntVector vix;
        if (isp.laneCount() != vsp.laneCount()) {
            // For $Type$MaxVector,  if vector length is non-power-of-two or
            // 2048 bits, indexShape of $Type$ species is S_MAX_BIT.
            // Assume that vector length is 2048, then the lane count of $Type$
            // vector is 32. When converting $Type$ species to int species,
            // indexShape is still S_MAX_BIT, but the lane count of int vector
            // is 64. So when loading index vector (IntVector), only lower half
            // of index data is needed.
            vix = IntVector
                .fromArray(isp, indexMap, mapOffset, IntMaxVector.IntMaxMask.LOWER_HALF_TRUE_MASK)
                .add(offset);
        } else {
            vix = IntVector
                .fromArray(isp, indexMap, mapOffset)
                .add(offset);
        }
#else[longOrDouble]
        // Index vector: vix[0:n] = k -> offset + indexMap[mapOffset + k]
        IntVector vix = IntVector
            .fromArray(isp, indexMap, mapOffset)
            .add(offset);
#end[longOrDouble]

        vix = VectorIntrinsics.checkIndex(vix, a.length);

        return VectorSupport.loadWithMap(
            vectorType, null, $type$.class, vsp.laneCount(),
            isp.vectorType(),
            a, ARRAY_BASE, vix, null,
            a, offset, indexMap, mapOffset, vsp,
            (c, idx, iMap, idy, s, vm) ->
            s.vOp(n -> c[idx + iMap[idy+n]]));
    }
#end[byteOrShort]

    /**
     * Gathers a new vector composed of elements from an array of type
     * {@code $type$[]},
     * under the control of a mask, and
     * using indexes obtained by adding a fixed {@code offset} to a
     * series of secondary offsets from an <em>index map</em>.
     * The index map is a contiguous sequence of {@code VLENGTH}
     * elements in a second array of {@code int}s, starting at a given
     * {@code mapOffset}.
     * <p>
     * For each vector lane, where {@code N} is the vector lane index,
     * if the lane is set in the mask,
     * the lane is loaded from the array
     * element {@code a[f(N)]}, where {@code f(N)} is the
     * index mapping expression
     * {@code offset + indexMap[mapOffset + N]]}.
     * Unset lanes in the resulting vector are set to zero.
     *
     * @param species species of desired vector
     * @param a the array
     * @param offset the offset into the array, may be negative if relative
     * indexes in the index map compensate to produce a value within the
     * array bounds
     * @param indexMap the index map
     * @param mapOffset the offset into the index map
     * @param m the mask controlling lane selection
     * @return the vector loaded from the indexed elements of the array
     * @throws IndexOutOfBoundsException
     *         if {@code mapOffset+N < 0}
     *         or if {@code mapOffset+N >= indexMap.length},
     *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
     *         is an invalid index into {@code a},
     *         for any lane {@code N} in the vector
     *         where the mask is set
     * @see $abstractvectortype$#toIntArray()
     */
    @ForceInline
    public static
    $abstractvectortype$ fromArray(VectorSpecies<$Boxtype$> species,
                                   $type$[] a, int offset,
                                   int[] indexMap, int mapOffset,
                                   VectorMask<$Boxtype$> m) {
        if (m.allTrue()) {
            return fromArray(species, a, offset, indexMap, mapOffset);
        }
        else {
            $Type$Species vsp = ($Type$Species) species;
            return vsp.dummyVector().fromArray0(a, offset, indexMap, mapOffset, m);
        }
    }

#if[short]
    /**
     * Loads a vector from an array of type {@code char[]}
     * starting at an offset.
     * For each vector lane, where {@code N} is the vector lane index, the
     * array element at index {@code offset + N}
     * is first cast to a {@code short} value and then
     * placed into the resulting vector at lane index {@code N}.
     *
     * @param species species of desired vector
     * @param a the array
     * @param offset the offset into the array
     * @return the vector loaded from an array
     * @throws IndexOutOfBoundsException
     *         if {@code offset+N < 0} or {@code offset+N >= a.length}
     *         for any lane {@code N} in the vector
     */
    @ForceInline
    public static
    $abstractvectortype$ fromCharArray(VectorSpecies<$Boxtype$> species,
                                       char[] a, int offset) {
        offset = checkFromIndexSize(offset, species.length(), a.length);
        $Type$Species vsp = ($Type$Species) species;
        return vsp.dummyVector().fromCharArray0(a, offset);
    }

    /**
     * Loads a vector from an array of type {@code char[]}
     * starting at an offset and using a mask.
     * Lanes where the mask is unset are filled with the default
     * value of {@code $type$} ({#if[FP]?positive }zero).
     * For each vector lane, where {@code N} is the vector lane index,
     * if the mask lane at index {@code N} is set then the array element at
     * index {@code offset + N}
     * is first cast to a {@code short} value and then
     * placed into the resulting vector at lane index
     * {@code N}, otherwise the default element value is placed into the
     * resulting vector at lane index {@code N}.
     *
     * @param species species of desired vector
     * @param a the array
     * @param offset the offset into the array
     * @param m the mask controlling lane selection
     * @return the vector loaded from an array
     * @throws IndexOutOfBoundsException
     *         if {@code offset+N < 0} or {@code offset+N >= a.length}
     *         for any lane {@code N} in the vector
     *         where the mask is set
     */
    @ForceInline
    public static
    $abstractvectortype$ fromCharArray(VectorSpecies<$Boxtype$> species,
                                       char[] a, int offset,
                                       VectorMask<$Boxtype$> m) {
        $Type$Species vsp = ($Type$Species) species;
        if (VectorIntrinsics.indexInRange(offset, vsp.length(), a.length)) {
            return vsp.dummyVector().fromCharArray0(a, offset, m, OFFSET_IN_RANGE);
        }

        ((AbstractMask<$Boxtype$>)m)
            .checkIndexByLane(offset, a.length, vsp.iota(), 1);
        return vsp.dummyVector().fromCharArray0(a, offset, m, OFFSET_OUT_OF_RANGE);
    }

    /**
     * Gathers a new vector composed of elements from an array of type
     * {@code char[]},
     * using indexes obtained by adding a fixed {@code offset} to a
     * series of secondary offsets from an <em>index map</em>.
     * The index map is a contiguous sequence of {@code VLENGTH}
     * elements in a second array of {@code int}s, starting at a given
     * {@code mapOffset}.
     * <p>
     * For each vector lane, where {@code N} is the vector lane index,
     * the lane is loaded from the expression
     * {@code (short) a[f(N)]}, where {@code f(N)} is the
     * index mapping expression
     * {@code offset + indexMap[mapOffset + N]]}.
     *
     * @param species species of desired vector
     * @param a the array
     * @param offset the offset into the array, may be negative if relative
     * indexes in the index map compensate to produce a value within the
     * array bounds
     * @param indexMap the index map
     * @param mapOffset the offset into the index map
     * @return the vector loaded from the indexed elements of the array
     * @throws IndexOutOfBoundsException
     *         if {@code mapOffset+N < 0}
     *         or if {@code mapOffset+N >= indexMap.length},
     *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
     *         is an invalid index into {@code a},
     *         for any lane {@code N} in the vector
     * @see $abstractvectortype$#toIntArray()
     */
    @ForceInline
    public static
    $abstractvectortype$ fromCharArray(VectorSpecies<$Boxtype$> species,
                                       char[] a, int offset,
                                       int[] indexMap, int mapOffset) {
        // FIXME: optimize
        $Type$Species vsp = ($Type$Species) species;
        return vsp.vOp(n -> (short) a[offset + indexMap[mapOffset + n]]);
    }

    /**
     * Gathers a new vector composed of elements from an array of type
     * {@code char[]},
     * under the control of a mask, and
     * using indexes obtained by adding a fixed {@code offset} to a
     * series of secondary offsets from an <em>index map</em>.
     * The index map is a contiguous sequence of {@code VLENGTH}
     * elements in a second array of {@code int}s, starting at a given
     * {@code mapOffset}.
     * <p>
     * For each vector lane, where {@code N} is the vector lane index,
     * if the lane is set in the mask,
     * the lane is loaded from the expression
     * {@code (short) a[f(N)]}, where {@code f(N)} is the
     * index mapping expression
     * {@code offset + indexMap[mapOffset + N]]}.
     * Unset lanes in the resulting vector are set to zero.
     *
     * @param species species of desired vector
     * @param a the array
     * @param offset the offset into the array, may be negative if relative
     * indexes in the index map compensate to produce a value within the
     * array bounds
     * @param indexMap the index map
     * @param mapOffset the offset into the index map
     * @param m the mask controlling lane selection
     * @return the vector loaded from the indexed elements of the array
     * @throws IndexOutOfBoundsException
     *         if {@code mapOffset+N < 0}
     *         or if {@code mapOffset+N >= indexMap.length},
     *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
     *         is an invalid index into {@code a},
     *         for any lane {@code N} in the vector
     *         where the mask is set
     * @see $abstractvectortype$#toIntArray()
     */
    @ForceInline
    public static
    $abstractvectortype$ fromCharArray(VectorSpecies<$Boxtype$> species,
                                       char[] a, int offset,
                                       int[] indexMap, int mapOffset,
                                       VectorMask<$Boxtype$> m) {
        // FIXME: optimize
        $Type$Species vsp = ($Type$Species) species;
        return vsp.vOp(m, n -> (short) a[offset + indexMap[mapOffset + n]]);
    }
#end[short]

#if[byte]
    /**
     * Loads a vector from an array of type {@code boolean[]}
     * starting at an offset.
     * For each vector lane, where {@code N} is the vector lane index, the
     * array element at index {@code offset + N}
     * is first converted to a {@code byte} value and then
     * placed into the resulting vector at lane index {@code N}.
     * <p>
     * A {@code boolean} value is converted to a {@code byte} value by applying the
     * expression {@code (byte) (b ? 1 : 0)}, where {@code b} is the {@code boolean} value.
     *
     * @param species species of desired vector
     * @param a the array
     * @param offset the offset into the array
     * @return the vector loaded from an array
     * @throws IndexOutOfBoundsException
     *         if {@code offset+N < 0} or {@code offset+N >= a.length}
     *         for any lane {@code N} in the vector
     */
    @ForceInline
    public static
    $abstractvectortype$ fromBooleanArray(VectorSpecies<$Boxtype$> species,
                                          boolean[] a, int offset) {
        offset = checkFromIndexSize(offset, species.length(), a.length);
        $Type$Species vsp = ($Type$Species) species;
        return vsp.dummyVector().fromBooleanArray0(a, offset);
    }

    /**
     * Loads a vector from an array of type {@code boolean[]}
     * starting at an offset and using a mask.
     * Lanes where the mask is unset are filled with the default
     * value of {@code $type$} ({#if[FP]?positive }zero).
     * For each vector lane, where {@code N} is the vector lane index,
     * if the mask lane at index {@code N} is set then the array element at
     * index {@code offset + N}
     * is first converted to a {@code byte} value and then
     * placed into the resulting vector at lane index
     * {@code N}, otherwise the default element value is placed into the
     * resulting vector at lane index {@code N}.
     * <p>
     * A {@code boolean} value is converted to a {@code byte} value by applying the
     * expression {@code (byte) (b ? 1 : 0)}, where {@code b} is the {@code boolean} value.
     *
     * @param species species of desired vector
     * @param a the array
     * @param offset the offset into the array
     * @param m the mask controlling lane selection
     * @return the vector loaded from an array
     * @throws IndexOutOfBoundsException
     *         if {@code offset+N < 0} or {@code offset+N >= a.length}
     *         for any lane {@code N} in the vector
     *         where the mask is set
     */
    @ForceInline
    public static
    $abstractvectortype$ fromBooleanArray(VectorSpecies<$Boxtype$> species,
                                          boolean[] a, int offset,
                                          VectorMask<$Boxtype$> m) {
        $Type$Species vsp = ($Type$Species) species;
        if (VectorIntrinsics.indexInRange(offset, vsp.length(), a.length)) {
            $abstractvectortype$ zero = vsp.zero();
            return vsp.dummyVector().fromBooleanArray0(a, offset, m, OFFSET_IN_RANGE);
        }

        ((AbstractMask<$Boxtype$>)m)
            .checkIndexByLane(offset, a.length, vsp.iota(), 1);
        return vsp.dummyVector().fromBooleanArray0(a, offset, m, OFFSET_OUT_OF_RANGE);
    }

    /**
     * Gathers a new vector composed of elements from an array of type
     * {@code boolean[]},
     * using indexes obtained by adding a fixed {@code offset} to a
     * series of secondary offsets from an <em>index map</em>.
     * The index map is a contiguous sequence of {@code VLENGTH}
     * elements in a second array of {@code int}s, starting at a given
     * {@code mapOffset}.
     * <p>
     * For each vector lane, where {@code N} is the vector lane index,
     * the lane is loaded from the expression
     * {@code (byte) (a[f(N)] ? 1 : 0)}, where {@code f(N)} is the
     * index mapping expression
     * {@code offset + indexMap[mapOffset + N]]}.
     *
     * @param species species of desired vector
     * @param a the array
     * @param offset the offset into the array, may be negative if relative
     * indexes in the index map compensate to produce a value within the
     * array bounds
     * @param indexMap the index map
     * @param mapOffset the offset into the index map
     * @return the vector loaded from the indexed elements of the array
     * @throws IndexOutOfBoundsException
     *         if {@code mapOffset+N < 0}
     *         or if {@code mapOffset+N >= indexMap.length},
     *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
     *         is an invalid index into {@code a},
     *         for any lane {@code N} in the vector
     * @see $abstractvectortype$#toIntArray()
     */
    @ForceInline
    public static
    $abstractvectortype$ fromBooleanArray(VectorSpecies<$Boxtype$> species,
                                          boolean[] a, int offset,
                                          int[] indexMap, int mapOffset) {
        // FIXME: optimize
        $Type$Species vsp = ($Type$Species) species;
        return vsp.vOp(n -> (byte) (a[offset + indexMap[mapOffset + n]] ? 1 : 0));
    }

    /**
     * Gathers a new vector composed of elements from an array of type
     * {@code boolean[]},
     * under the control of a mask, and
     * using indexes obtained by adding a fixed {@code offset} to a
     * series of secondary offsets from an <em>index map</em>.
     * The index map is a contiguous sequence of {@code VLENGTH}
     * elements in a second array of {@code int}s, starting at a given
     * {@code mapOffset}.
     * <p>
     * For each vector lane, where {@code N} is the vector lane index,
     * if the lane is set in the mask,
     * the lane is loaded from the expression
     * {@code (byte) (a[f(N)] ? 1 : 0)}, where {@code f(N)} is the
     * index mapping expression
     * {@code offset + indexMap[mapOffset + N]]}.
     * Unset lanes in the resulting vector are set to zero.
     *
     * @param species species of desired vector
     * @param a the array
     * @param offset the offset into the array, may be negative if relative
     * indexes in the index map compensate to produce a value within the
     * array bounds
     * @param indexMap the index map
     * @param mapOffset the offset into the index map
     * @param m the mask controlling lane selection
     * @return the vector loaded from the indexed elements of the array
     * @throws IndexOutOfBoundsException
     *         if {@code mapOffset+N < 0}
     *         or if {@code mapOffset+N >= indexMap.length},
     *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
     *         is an invalid index into {@code a},
     *         for any lane {@code N} in the vector
     *         where the mask is set
     * @see $abstractvectortype$#toIntArray()
     */
    @ForceInline
    public static
    $abstractvectortype$ fromBooleanArray(VectorSpecies<$Boxtype$> species,
                                          boolean[] a, int offset,
                                          int[] indexMap, int mapOffset,
                                          VectorMask<$Boxtype$> m) {
        // FIXME: optimize
        $Type$Species vsp = ($Type$Species) species;
        return vsp.vOp(m, n -> (byte) (a[offset + indexMap[mapOffset + n]] ? 1 : 0));
    }
#end[byte]

    /**
     * Loads a vector from a {@linkplain MemorySegment memory segment}
     * starting at an offset into the memory segment.
     * Bytes are composed into primitive lane elements according
     * to the specified byte order.
     * The vector is arranged into lanes according to
     * <a href="Vector.html#lane-order">memory ordering</a>.
     * <p>
     * This method behaves as if it returns the result of calling
     * {@link #fromMemorySegment(VectorSpecies,MemorySegment,long,ByteOrder,VectorMask)
     * fromMemorySegment()} as follows:
     * <pre>{@code
     * var m = species.maskAll(true);
     * return fromMemorySegment(species, ms, offset, bo, m);
     * }</pre>
     *
     * @param species species of desired vector
     * @param ms the memory segment
     * @param offset the offset into the memory segment
     * @param bo the intended byte order
     * @return a vector loaded from the memory segment
     * @throws IndexOutOfBoundsException
     *         if {@code offset+N*$sizeInBytes$ < 0}
     *         or {@code offset+N*$sizeInBytes$ >= ms.byteSize()}
     *         for any lane {@code N} in the vector
     * @throws IllegalStateException if the memory segment's session is not alive,
     *         or if access occurs from a thread other than the thread owning the session.
     * @since 19
     */
    @ForceInline
    public static
    $abstractvectortype$ fromMemorySegment(VectorSpecies<$Boxtype$> species,
                                           MemorySegment ms, long offset,
                                           ByteOrder bo) {
        offset = checkFromIndexSize(offset, species.vectorByteSize(), ms.byteSize());
        $Type$Species vsp = ($Type$Species) species;
        return vsp.dummyVector().fromMemorySegment0(ms, offset).maybeSwap(bo);
    }

    /**
     * Loads a vector from a {@linkplain MemorySegment memory segment}
     * starting at an offset into the memory segment
     * and using a mask.
     * Lanes where the mask is unset are filled with the default
     * value of {@code $type$} ({#if[FP]?positive }zero).
     * Bytes are composed into primitive lane elements according
     * to the specified byte order.
     * The vector is arranged into lanes according to
     * <a href="Vector.html#lane-order">memory ordering</a>.
     * <p>
     * The following pseudocode illustrates the behavior:
     * <pre>{@code
     * var slice = ms.asSlice(offset);
     * $type$[] ar = new $type$[species.length()];
     * for (int n = 0; n < ar.length; n++) {
     *     if (m.laneIsSet(n)) {
     *         ar[n] = slice.getAtIndex(ValuaLayout.JAVA_$TYPE$.withByteAlignment(1), n);
     *     }
     * }
     * $abstractvectortype$ r = $abstractvectortype$.fromArray(species, ar, 0);
     * }</pre>
     * @implNote
#if[!byte]
     * This operation is likely to be more efficient if
     * the specified byte order is the same as
     * {@linkplain ByteOrder#nativeOrder()
     * the platform native order},
     * since this method will not need to reorder
     * the bytes of lane values.
#else[!byte]
     * The byte order argument is ignored.
#end[!byte]
     *
     * @param species species of desired vector
     * @param ms the memory segment
     * @param offset the offset into the memory segment
     * @param bo the intended byte order
     * @param m the mask controlling lane selection
     * @return a vector loaded from the memory segment
     * @throws IndexOutOfBoundsException
     *         if {@code offset+N*$sizeInBytes$ < 0}
     *         or {@code offset+N*$sizeInBytes$ >= ms.byteSize()}
     *         for any lane {@code N} in the vector
     *         where the mask is set
     * @throws IllegalStateException if the memory segment's session is not alive,
     *         or if access occurs from a thread other than the thread owning the session.
     * @since 19
     */
    @ForceInline
    public static
    $abstractvectortype$ fromMemorySegment(VectorSpecies<$Boxtype$> species,
                                           MemorySegment ms, long offset,
                                           ByteOrder bo,
                                           VectorMask<$Boxtype$> m) {
        $Type$Species vsp = ($Type$Species) species;
        if (VectorIntrinsics.indexInRange(offset, vsp.vectorByteSize(), ms.byteSize())) {
            return vsp.dummyVector().fromMemorySegment0(ms, offset, m, OFFSET_IN_RANGE).maybeSwap(bo);
        }

        ((AbstractMask<$Boxtype$>)m)
            .checkIndexByLane(offset, ms.byteSize(), vsp.iota(), $sizeInBytes$);
        return vsp.dummyVector().fromMemorySegment0(ms, offset, m, OFFSET_OUT_OF_RANGE).maybeSwap(bo);
    }

    // Memory store operations

    /**
     * Stores this vector into an array of type {@code $type$[]}
     * starting at an offset.
     * <p>
     * For each vector lane, where {@code N} is the vector lane index,
     * the lane element at index {@code N} is stored into the array
     * element {@code a[offset+N]}.
     *
     * @param a the array, of type {@code $type$[]}
     * @param offset the offset into the array
     * @throws IndexOutOfBoundsException
     *         if {@code offset+N < 0} or {@code offset+N >= a.length}
     *         for any lane {@code N} in the vector
     */
    @ForceInline
    public final
    void intoArray($type$[] a, int offset) {
        offset = checkFromIndexSize(offset, length(), a.length);
        $Type$Species vsp = vspecies();
        VectorSupport.store(
            vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
            a, arrayAddress(a, offset), false,
            this,
            a, offset,
            (arr, off, v)
            -> v.stOp(arr, (int) off,
                      (arr_, off_, i, e) -> arr_[off_ + i] = e));
    }

    /**
     * Stores this vector into an array of type {@code $type$[]}
     * starting at offset and using a mask.
     * <p>
     * For each vector lane, where {@code N} is the vector lane index,
     * the lane element at index {@code N} is stored into the array
     * element {@code a[offset+N]}.
     * If the mask lane at {@code N} is unset then the corresponding
     * array element {@code a[offset+N]} is left unchanged.
     * <p>
     * Array range checking is done for lanes where the mask is set.
     * Lanes where the mask is unset are not stored and do not need
     * to correspond to legitimate elements of {@code a}.
     * That is, unset lanes may correspond to array indexes less than
     * zero or beyond the end of the array.
     *
     * @param a the array, of type {@code $type$[]}
     * @param offset the offset into the array
     * @param m the mask controlling lane storage
     * @throws IndexOutOfBoundsException
     *         if {@code offset+N < 0} or {@code offset+N >= a.length}
     *         for any lane {@code N} in the vector
     *         where the mask is set
     */
    @ForceInline
    public final
    void intoArray($type$[] a, int offset,
                   VectorMask<$Boxtype$> m) {
        if (m.allTrue()) {
            intoArray(a, offset);
        } else {
            $Type$Species vsp = vspecies();
            if (!VectorIntrinsics.indexInRange(offset, vsp.length(), a.length)) {
                ((AbstractMask<$Boxtype$>)m)
                    .checkIndexByLane(offset, a.length, vsp.iota(), 1);
            }
            intoArray0(a, offset, m);
        }
    }

    /**
     * Scatters this vector into an array of type {@code $type$[]}
     * using indexes obtained by adding a fixed {@code offset} to a
     * series of secondary offsets from an <em>index map</em>.
     * The index map is a contiguous sequence of {@code VLENGTH}
     * elements in a second array of {@code int}s, starting at a given
     * {@code mapOffset}.
     * <p>
     * For each vector lane, where {@code N} is the vector lane index,
     * the lane element at index {@code N} is stored into the array
     * element {@code a[f(N)]}, where {@code f(N)} is the
     * index mapping expression
     * {@code offset + indexMap[mapOffset + N]]}.
     *
     * @param a the array
     * @param offset an offset to combine with the index map offsets
     * @param indexMap the index map
     * @param mapOffset the offset into the index map
     * @throws IndexOutOfBoundsException
     *         if {@code mapOffset+N < 0}
     *         or if {@code mapOffset+N >= indexMap.length},
     *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
     *         is an invalid index into {@code a},
     *         for any lane {@code N} in the vector
     * @see $abstractvectortype$#toIntArray()
     */
#if[byteOrShort]
    @ForceInline
    public final
    void intoArray($type$[] a, int offset,
                   int[] indexMap, int mapOffset) {
        stOp(a, offset,
             (arr, off, i, e) -> {
                 int j = indexMap[mapOffset + i];
                 arr[off + j] = e;
             });
    }
#else[byteOrShort]
    @ForceInline
    public final
    void intoArray($type$[] a, int offset,
                   int[] indexMap, int mapOffset) {
        $Type$Species vsp = vspecies();
        IntVector.IntSpecies isp = IntVector.species(vsp.indexShape());
#if[longOrDouble]
        if (vsp.laneCount() == 1) {
            intoArray(a, offset + indexMap[mapOffset]);
            return;
        }

        // Index vector: vix[0:n] = i -> offset + indexMap[mo + i]
        IntVector vix;
        if (isp.laneCount() != vsp.laneCount()) {
            // For $Type$MaxVector,  if vector length  is 2048 bits, indexShape
            // of $Type$ species is S_MAX_BIT. and the lane count of $Type$
            // vector is 32. When converting $Type$ species to int species,
            // indexShape is still S_MAX_BIT, but the lane count of int vector
            // is 64. So when loading index vector (IntVector), only lower half
            // of index data is needed.
            vix = IntVector
                .fromArray(isp, indexMap, mapOffset, IntMaxVector.IntMaxMask.LOWER_HALF_TRUE_MASK)
                .add(offset);
        } else {
            vix = IntVector
                .fromArray(isp, indexMap, mapOffset)
                .add(offset);
        }

#else[longOrDouble]
        // Index vector: vix[0:n] = i -> offset + indexMap[mo + i]
        IntVector vix = IntVector
            .fromArray(isp, indexMap, mapOffset)
            .add(offset);
#end[longOrDouble]

        vix = VectorIntrinsics.checkIndex(vix, a.length);

        VectorSupport.storeWithMap(
            vsp.vectorType(), null, vsp.elementType(), vsp.laneCount(),
            isp.vectorType(),
            a, arrayAddress(a, 0), vix,
            this, null,
            a, offset, indexMap, mapOffset,
            (arr, off, v, map, mo, vm)
            -> v.stOp(arr, off,
                      (arr_, off_, i, e) -> {
                          int j = map[mo + i];
                          arr[off + j] = e;
                      }));
    }
#end[byteOrShort]

    /**
     * Scatters this vector into an array of type {@code $type$[]},
     * under the control of a mask, and
     * using indexes obtained by adding a fixed {@code offset} to a
     * series of secondary offsets from an <em>index map</em>.
     * The index map is a contiguous sequence of {@code VLENGTH}
     * elements in a second array of {@code int}s, starting at a given
     * {@code mapOffset}.
     * <p>
     * For each vector lane, where {@code N} is the vector lane index,
     * if the mask lane at index {@code N} is set then
     * the lane element at index {@code N} is stored into the array
     * element {@code a[f(N)]}, where {@code f(N)} is the
     * index mapping expression
     * {@code offset + indexMap[mapOffset + N]]}.
     *
     * @param a the array
     * @param offset an offset to combine with the index map offsets
     * @param indexMap the index map
     * @param mapOffset the offset into the index map
     * @param m the mask
     * @throws IndexOutOfBoundsException
     *         if {@code mapOffset+N < 0}
     *         or if {@code mapOffset+N >= indexMap.length},
     *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
     *         is an invalid index into {@code a},
     *         for any lane {@code N} in the vector
     *         where the mask is set
     * @see $abstractvectortype$#toIntArray()
     */
#if[byteOrShort]
    @ForceInline
    public final
    void intoArray($type$[] a, int offset,
                   int[] indexMap, int mapOffset,
                   VectorMask<$Boxtype$> m) {
        stOp(a, offset, m,
             (arr, off, i, e) -> {
                 int j = indexMap[mapOffset + i];
                 arr[off + j] = e;
             });
    }
#else[byteOrShort]
    @ForceInline
    public final
    void intoArray($type$[] a, int offset,
                   int[] indexMap, int mapOffset,
                   VectorMask<$Boxtype$> m) {
        if (m.allTrue()) {
            intoArray(a, offset, indexMap, mapOffset);
        }
        else {
            intoArray0(a, offset, indexMap, mapOffset, m);
        }
    }
#end[byteOrShort]

#if[short]
    /**
     * Stores this vector into an array of type {@code char[]}
     * starting at an offset.
     * <p>
     * For each vector lane, where {@code N} is the vector lane index,
     * the lane element at index {@code N}
     * is first cast to a {@code char} value and then
     * stored into the array element {@code a[offset+N]}.
     *
     * @param a the array, of type {@code char[]}
     * @param offset the offset into the array
     * @throws IndexOutOfBoundsException
     *         if {@code offset+N < 0} or {@code offset+N >= a.length}
     *         for any lane {@code N} in the vector
     */
    @ForceInline
    public final
    void intoCharArray(char[] a, int offset) {
        offset = checkFromIndexSize(offset, length(), a.length);
        $Type$Species vsp = vspecies();
        VectorSupport.store(
            vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
            a, charArrayAddress(a, offset), false,
            this,
            a, offset,
            (arr, off, v)
            -> v.stOp(arr, (int) off,
                      (arr_, off_, i, e) -> arr_[off_ + i] = (char) e));
    }

    /**
     * Stores this vector into an array of type {@code char[]}
     * starting at offset and using a mask.
     * <p>
     * For each vector lane, where {@code N} is the vector lane index,
     * the lane element at index {@code N}
     * is first cast to a {@code char} value and then
     * stored into the array element {@code a[offset+N]}.
     * If the mask lane at {@code N} is unset then the corresponding
     * array element {@code a[offset+N]} is left unchanged.
     * <p>
     * Array range checking is done for lanes where the mask is set.
     * Lanes where the mask is unset are not stored and do not need
     * to correspond to legitimate elements of {@code a}.
     * That is, unset lanes may correspond to array indexes less than
     * zero or beyond the end of the array.
     *
     * @param a the array, of type {@code char[]}
     * @param offset the offset into the array
     * @param m the mask controlling lane storage
     * @throws IndexOutOfBoundsException
     *         if {@code offset+N < 0} or {@code offset+N >= a.length}
     *         for any lane {@code N} in the vector
     *         where the mask is set
     */
    @ForceInline
    public final
    void intoCharArray(char[] a, int offset,
                       VectorMask<$Boxtype$> m) {
        if (m.allTrue()) {
            intoCharArray(a, offset);
        } else {
            $Type$Species vsp = vspecies();
            if (!VectorIntrinsics.indexInRange(offset, vsp.length(), a.length)) {
                ((AbstractMask<$Boxtype$>)m)
                    .checkIndexByLane(offset, a.length, vsp.iota(), 1);
            }
            intoCharArray0(a, offset, m);
        }
    }

    /**
     * Scatters this vector into an array of type {@code char[]}
     * using indexes obtained by adding a fixed {@code offset} to a
     * series of secondary offsets from an <em>index map</em>.
     * The index map is a contiguous sequence of {@code VLENGTH}
     * elements in a second array of {@code int}s, starting at a given
     * {@code mapOffset}.
     * <p>
     * For each vector lane, where {@code N} is the vector lane index,
     * the lane element at index {@code N}
     * is first cast to a {@code char} value and then
     * stored into the array
     * element {@code a[f(N)]}, where {@code f(N)} is the
     * index mapping expression
     * {@code offset + indexMap[mapOffset + N]]}.
     *
     * @param a the array
     * @param offset an offset to combine with the index map offsets
     * @param indexMap the index map
     * @param mapOffset the offset into the index map
     * @throws IndexOutOfBoundsException
     *         if {@code mapOffset+N < 0}
     *         or if {@code mapOffset+N >= indexMap.length},
     *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
     *         is an invalid index into {@code a},
     *         for any lane {@code N} in the vector
     * @see $abstractvectortype$#toIntArray()
     */
    @ForceInline
    public final
    void intoCharArray(char[] a, int offset,
                       int[] indexMap, int mapOffset) {
        // FIXME: optimize
        stOp(a, offset,
             (arr, off, i, e) -> {
                 int j = indexMap[mapOffset + i];
                 arr[off + j] = (char) e;
             });
    }

    /**
     * Scatters this vector into an array of type {@code char[]},
     * under the control of a mask, and
     * using indexes obtained by adding a fixed {@code offset} to a
     * series of secondary offsets from an <em>index map</em>.
     * The index map is a contiguous sequence of {@code VLENGTH}
     * elements in a second array of {@code int}s, starting at a given
     * {@code mapOffset}.
     * <p>
     * For each vector lane, where {@code N} is the vector lane index,
     * if the mask lane at index {@code N} is set then
     * the lane element at index {@code N}
     * is first cast to a {@code char} value and then
     * stored into the array
     * element {@code a[f(N)]}, where {@code f(N)} is the
     * index mapping expression
     * {@code offset + indexMap[mapOffset + N]]}.
     *
     * @param a the array
     * @param offset an offset to combine with the index map offsets
     * @param indexMap the index map
     * @param mapOffset the offset into the index map
     * @param m the mask
     * @throws IndexOutOfBoundsException
     *         if {@code mapOffset+N < 0}
     *         or if {@code mapOffset+N >= indexMap.length},
     *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
     *         is an invalid index into {@code a},
     *         for any lane {@code N} in the vector
     *         where the mask is set
     * @see $abstractvectortype$#toIntArray()
     */
    @ForceInline
    public final
    void intoCharArray(char[] a, int offset,
                       int[] indexMap, int mapOffset,
                       VectorMask<$Boxtype$> m) {
        // FIXME: optimize
        stOp(a, offset, m,
             (arr, off, i, e) -> {
                 int j = indexMap[mapOffset + i];
                 arr[off + j] = (char) e;
             });
    }
#end[short]

#if[byte]
    /**
     * Stores this vector into an array of type {@code boolean[]}
     * starting at an offset.
     * <p>
     * For each vector lane, where {@code N} is the vector lane index,
     * the lane element at index {@code N}
     * is first converted to a {@code boolean} value and then
     * stored into the array element {@code a[offset+N]}.
     * <p>
     * A {@code byte} value is converted to a {@code boolean} value by applying the
     * expression {@code (b & 1) != 0} where {@code b} is the byte value.
     *
     * @param a the array
     * @param offset the offset into the array
     * @throws IndexOutOfBoundsException
     *         if {@code offset+N < 0} or {@code offset+N >= a.length}
     *         for any lane {@code N} in the vector
     */
    @ForceInline
    public final
    void intoBooleanArray(boolean[] a, int offset) {
        offset = checkFromIndexSize(offset, length(), a.length);
        $Type$Species vsp = vspecies();
        ByteVector normalized = this.and((byte) 1);
        VectorSupport.store(
            vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
            a, booleanArrayAddress(a, offset), false,
            normalized,
            a, offset,
            (arr, off, v)
            -> v.stOp(arr, (int) off,
                      (arr_, off_, i, e) -> arr_[off_ + i] = (e & 1) != 0));
    }

    /**
     * Stores this vector into an array of type {@code boolean[]}
     * starting at offset and using a mask.
     * <p>
     * For each vector lane, where {@code N} is the vector lane index,
     * the lane element at index {@code N}
     * is first converted to a {@code boolean} value and then
     * stored into the array element {@code a[offset+N]}.
     * If the mask lane at {@code N} is unset then the corresponding
     * array element {@code a[offset+N]} is left unchanged.
     * <p>
     * A {@code byte} value is converted to a {@code boolean} value by applying the
     * expression {@code (b & 1) != 0} where {@code b} is the byte value.
     * <p>
     * Array range checking is done for lanes where the mask is set.
     * Lanes where the mask is unset are not stored and do not need
     * to correspond to legitimate elements of {@code a}.
     * That is, unset lanes may correspond to array indexes less than
     * zero or beyond the end of the array.
     *
     * @param a the array
     * @param offset the offset into the array
     * @param m the mask controlling lane storage
     * @throws IndexOutOfBoundsException
     *         if {@code offset+N < 0} or {@code offset+N >= a.length}
     *         for any lane {@code N} in the vector
     *         where the mask is set
     */
    @ForceInline
    public final
    void intoBooleanArray(boolean[] a, int offset,
                          VectorMask<$Boxtype$> m) {
        if (m.allTrue()) {
            intoBooleanArray(a, offset);
        } else {
            $Type$Species vsp = vspecies();
            if (!VectorIntrinsics.indexInRange(offset, vsp.length(), a.length)) {
                ((AbstractMask<$Boxtype$>)m)
                    .checkIndexByLane(offset, a.length, vsp.iota(), 1);
            }
            intoBooleanArray0(a, offset, m);
        }
    }

    /**
     * Scatters this vector into an array of type {@code boolean[]}
     * using indexes obtained by adding a fixed {@code offset} to a
     * series of secondary offsets from an <em>index map</em>.
     * The index map is a contiguous sequence of {@code VLENGTH}
     * elements in a second array of {@code int}s, starting at a given
     * {@code mapOffset}.
     * <p>
     * For each vector lane, where {@code N} is the vector lane index,
     * the lane element at index {@code N}
     * is first converted to a {@code boolean} value and then
     * stored into the array
     * element {@code a[f(N)]}, where {@code f(N)} is the
     * index mapping expression
     * {@code offset + indexMap[mapOffset + N]]}.
     * <p>
     * A {@code byte} value is converted to a {@code boolean} value by applying the
     * expression {@code (b & 1) != 0} where {@code b} is the byte value.
     *
     * @param a the array
     * @param offset an offset to combine with the index map offsets
     * @param indexMap the index map
     * @param mapOffset the offset into the index map
     * @throws IndexOutOfBoundsException
     *         if {@code mapOffset+N < 0}
     *         or if {@code mapOffset+N >= indexMap.length},
     *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
     *         is an invalid index into {@code a},
     *         for any lane {@code N} in the vector
     * @see $abstractvectortype$#toIntArray()
     */
    @ForceInline
    public final
    void intoBooleanArray(boolean[] a, int offset,
                          int[] indexMap, int mapOffset) {
        // FIXME: optimize
        stOp(a, offset,
             (arr, off, i, e) -> {
                 int j = indexMap[mapOffset + i];
                 arr[off + j] = (e & 1) != 0;
             });
    }

    /**
     * Scatters this vector into an array of type {@code boolean[]},
     * under the control of a mask, and
     * using indexes obtained by adding a fixed {@code offset} to a
     * series of secondary offsets from an <em>index map</em>.
     * The index map is a contiguous sequence of {@code VLENGTH}
     * elements in a second array of {@code int}s, starting at a given
     * {@code mapOffset}.
     * <p>
     * For each vector lane, where {@code N} is the vector lane index,
     * if the mask lane at index {@code N} is set then
     * the lane element at index {@code N}
     * is first converted to a {@code boolean} value and then
     * stored into the array
     * element {@code a[f(N)]}, where {@code f(N)} is the
     * index mapping expression
     * {@code offset + indexMap[mapOffset + N]]}.
     * <p>
     * A {@code byte} value is converted to a {@code boolean} value by applying the
     * expression {@code (b & 1) != 0} where {@code b} is the byte value.
     *
     * @param a the array
     * @param offset an offset to combine with the index map offsets
     * @param indexMap the index map
     * @param mapOffset the offset into the index map
     * @param m the mask
     * @throws IndexOutOfBoundsException
     *         if {@code mapOffset+N < 0}
     *         or if {@code mapOffset+N >= indexMap.length},
     *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
     *         is an invalid index into {@code a},
     *         for any lane {@code N} in the vector
     *         where the mask is set
     * @see $abstractvectortype$#toIntArray()
     */
    @ForceInline
    public final
    void intoBooleanArray(boolean[] a, int offset,
                          int[] indexMap, int mapOffset,
                          VectorMask<$Boxtype$> m) {
        // FIXME: optimize
        stOp(a, offset, m,
             (arr, off, i, e) -> {
                 int j = indexMap[mapOffset + i];
                 arr[off + j] = (e & 1) != 0;
             });
    }
#end[byte]

    /**
     * {@inheritDoc} <!--workaround-->
     * @since 19
     */
    @Override
    @ForceInline
    public final
    void intoMemorySegment(MemorySegment ms, long offset,
                           ByteOrder bo) {
        if (ms.isReadOnly()) {
            throw new UnsupportedOperationException("Attempt to write a read-only segment");
        }

        offset = checkFromIndexSize(offset, byteSize(), ms.byteSize());
        maybeSwap(bo).intoMemorySegment0(ms, offset);
    }

    /**
     * {@inheritDoc} <!--workaround-->
     * @since 19
     */
    @Override
    @ForceInline
    public final
    void intoMemorySegment(MemorySegment ms, long offset,
                           ByteOrder bo,
                           VectorMask<$Boxtype$> m) {
        if (m.allTrue()) {
            intoMemorySegment(ms, offset, bo);
        } else {
            if (ms.isReadOnly()) {
                throw new UnsupportedOperationException("Attempt to write a read-only segment");
            }
            $Type$Species vsp = vspecies();
            if (!VectorIntrinsics.indexInRange(offset, vsp.vectorByteSize(), ms.byteSize())) {
                ((AbstractMask<$Boxtype$>)m)
                    .checkIndexByLane(offset, ms.byteSize(), vsp.iota(), $sizeInBytes$);
            }
            maybeSwap(bo).intoMemorySegment0(ms, offset, m);
        }
    }

    // ================================================

    // Low-level memory operations.
    //
    // Note that all of these operations *must* inline into a context
    // where the exact species of the involved vector is a
    // compile-time constant.  Otherwise, the intrinsic generation
    // will fail and performance will suffer.
    //
    // In many cases this is achieved by re-deriving a version of the
    // method in each concrete subclass (per species).  The re-derived
    // method simply calls one of these generic methods, with exact
    // parameters for the controlling metadata, which is either a
    // typed vector or constant species instance.

    // Unchecked loading operations in native byte order.
    // Caller is responsible for applying index checks, masking, and
    // byte swapping.

    /*package-private*/
    abstract
    $abstractvectortype$ fromArray0($type$[] a, int offset);
    @ForceInline
    final
    $abstractvectortype$ fromArray0Template($type$[] a, int offset) {
        $Type$Species vsp = vspecies();
        return VectorSupport.load(
            vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
            a, arrayAddress(a, offset), false,
            a, offset, vsp,
            (arr, off, s) -> s.ldOp(arr, (int) off,
                                    (arr_, off_, i) -> arr_[off_ + i]));
    }

    /*package-private*/
    abstract
    $abstractvectortype$ fromArray0($type$[] a, int offset, VectorMask<$Boxtype$> m, int offsetInRange);
    @ForceInline
    final
    <M extends VectorMask<$Boxtype$>>
    $abstractvectortype$ fromArray0Template(Class<M> maskClass, $type$[] a, int offset, M m, int offsetInRange) {
        m.check(species());
        $Type$Species vsp = vspecies();
        return VectorSupport.loadMasked(
            vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
            a, arrayAddress(a, offset), false, m, offsetInRange,
            a, offset, vsp,
            (arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
                                        (arr_, off_, i) -> arr_[off_ + i]));
    }

    /*package-private*/
    abstract
    $abstractvectortype$ fromArray0($type$[] a, int offset,
                                    int[] indexMap, int mapOffset,
                                    VectorMask<$Boxtype$> m);
#if[byteOrShort]
    @ForceInline
    final
    <M extends VectorMask<$Boxtype$>>
    $abstractvectortype$ fromArray0Template(Class<M> maskClass, $type$[] a, int offset,
                                            int[] indexMap, int mapOffset, M m) {
        $Type$Species vsp = vspecies();
        IntVector.IntSpecies isp = IntVector.species(vsp.indexShape());
        Objects.requireNonNull(a);
        Objects.requireNonNull(indexMap);
        m.check(vsp);
        Class<? extends $abstractvectortype$> vectorType = vsp.vectorType();


        // Constant folding should sweep out following conditonal logic.
        VectorSpecies<Integer> lsp;
        if (isp.length() > IntVector.SPECIES_PREFERRED.length()) {
            lsp = IntVector.SPECIES_PREFERRED;
        } else {
            lsp = isp;
        }

        // Check indices are within array bounds.
        // FIXME: Check index under mask controlling.
        for (int i = 0; i < vsp.length(); i += lsp.length()) {
            IntVector vix = IntVector
                .fromArray(lsp, indexMap, mapOffset + i)
                .add(offset);
            VectorIntrinsics.checkIndex(vix, a.length);
        }

        return VectorSupport.loadWithMap(
            vectorType, maskClass, $type$.class, vsp.laneCount(),
            lsp.vectorType(),
            a, ARRAY_BASE, null, m,
            a, offset, indexMap, mapOffset, vsp,
            (c, idx, iMap, idy, s, vm) ->
            s.vOp(vm, n -> c[idx + iMap[idy+n]]));
    }
#else[byteOrShort]
    @ForceInline
    final
    <M extends VectorMask<$Boxtype$>>
    $abstractvectortype$ fromArray0Template(Class<M> maskClass, $type$[] a, int offset,
                                            int[] indexMap, int mapOffset, M m) {
        $Type$Species vsp = vspecies();
        IntVector.IntSpecies isp = IntVector.species(vsp.indexShape());
        Objects.requireNonNull(a);
        Objects.requireNonNull(indexMap);
        m.check(vsp);
        Class<? extends $abstractvectortype$> vectorType = vsp.vectorType();

#if[longOrDouble]
        if (vsp.laneCount() == 1) {
          return $abstractvectortype$.fromArray(vsp, a, offset + indexMap[mapOffset], m);
        }

        // Index vector: vix[0:n] = k -> offset + indexMap[mapOffset + k]
        IntVector vix;
        if (isp.laneCount() != vsp.laneCount()) {
            // For $Type$MaxVector,  if vector length is non-power-of-two or
            // 2048 bits, indexShape of $Type$ species is S_MAX_BIT.
            // Assume that vector length is 2048, then the lane count of $Type$
            // vector is 32. When converting $Type$ species to int species,
            // indexShape is still S_MAX_BIT, but the lane count of int vector
            // is 64. So when loading index vector (IntVector), only lower half
            // of index data is needed.
            vix = IntVector
                .fromArray(isp, indexMap, mapOffset, IntMaxVector.IntMaxMask.LOWER_HALF_TRUE_MASK)
                .add(offset);
        } else {
            vix = IntVector
                .fromArray(isp, indexMap, mapOffset)
                .add(offset);
        }
#else[longOrDouble]
        // Index vector: vix[0:n] = k -> offset + indexMap[mapOffset + k]
        IntVector vix = IntVector
            .fromArray(isp, indexMap, mapOffset)
            .add(offset);
#end[longOrDouble]

        // FIXME: Check index under mask controlling.
        vix = VectorIntrinsics.checkIndex(vix, a.length);

        return VectorSupport.loadWithMap(
            vectorType, maskClass, $type$.class, vsp.laneCount(),
            isp.vectorType(),
            a, ARRAY_BASE, vix, m,
            a, offset, indexMap, mapOffset, vsp,
            (c, idx, iMap, idy, s, vm) ->
            s.vOp(vm, n -> c[idx + iMap[idy+n]]));
    }
#end[byteOrShort]

#if[short]
    /*package-private*/
    abstract
    $abstractvectortype$ fromCharArray0(char[] a, int offset);
    @ForceInline
    final
    $abstractvectortype$ fromCharArray0Template(char[] a, int offset) {
        $Type$Species vsp = vspecies();
        return VectorSupport.load(
            vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
            a, charArrayAddress(a, offset), false,
            a, offset, vsp,
            (arr, off, s) -> s.ldOp(arr, (int) off,
                                    (arr_, off_, i) -> (short) arr_[off_ + i]));
    }

    /*package-private*/
    abstract
    $abstractvectortype$ fromCharArray0(char[] a, int offset, VectorMask<$Boxtype$> m, int offsetInRange);
    @ForceInline
    final
    <M extends VectorMask<$Boxtype$>>
    $abstractvectortype$ fromCharArray0Template(Class<M> maskClass, char[] a, int offset, M m, int offsetInRange) {
        m.check(species());
        $Type$Species vsp = vspecies();
        return VectorSupport.loadMasked(
                vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
                a, charArrayAddress(a, offset), false, m, offsetInRange,
                a, offset, vsp,
                (arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
                                            (arr_, off_, i) -> (short) arr_[off_ + i]));
    }
#end[short]

#if[byte]
    /*package-private*/
    abstract
    $abstractvectortype$ fromBooleanArray0(boolean[] a, int offset);
    @ForceInline
    final
    $abstractvectortype$ fromBooleanArray0Template(boolean[] a, int offset) {
        $Type$Species vsp = vspecies();
        return VectorSupport.load(
            vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
            a, booleanArrayAddress(a, offset), false,
            a, offset, vsp,
            (arr, off, s) -> s.ldOp(arr, (int) off,
                                    (arr_, off_, i) -> (byte) (arr_[off_ + i] ? 1 : 0)));
    }

    /*package-private*/
    abstract
    $abstractvectortype$ fromBooleanArray0(boolean[] a, int offset, VectorMask<$Boxtype$> m, int offsetInRange);
    @ForceInline
    final
    <M extends VectorMask<$Boxtype$>>
    $abstractvectortype$ fromBooleanArray0Template(Class<M> maskClass, boolean[] a, int offset, M m, int offsetInRange) {
        m.check(species());
        $Type$Species vsp = vspecies();
        return VectorSupport.loadMasked(
            vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
            a, booleanArrayAddress(a, offset), false, m, offsetInRange,
            a, offset, vsp,
            (arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
                                        (arr_, off_, i) -> (byte) (arr_[off_ + i] ? 1 : 0)));
    }
#end[byte]

    abstract
    $abstractvectortype$ fromMemorySegment0(MemorySegment bb, long offset);
    @ForceInline
    final
    $abstractvectortype$ fromMemorySegment0Template(MemorySegment ms, long offset) {
        $Type$Species vsp = vspecies();
        return ScopedMemoryAccess.loadFromMemorySegment(
                vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
                (AbstractMemorySegmentImpl) ms, offset, vsp,
                (msp, off, s) -> {
                    return s.ldLongOp((MemorySegment) msp, off, $abstractvectortype$::memorySegmentGet);
                });
    }

    abstract
    $abstractvectortype$ fromMemorySegment0(MemorySegment ms, long offset, VectorMask<$Boxtype$> m, int offsetInRange);
    @ForceInline
    final
    <M extends VectorMask<$Boxtype$>>
    $abstractvectortype$ fromMemorySegment0Template(Class<M> maskClass, MemorySegment ms, long offset, M m, int offsetInRange) {
        $Type$Species vsp = vspecies();
        m.check(vsp);
        return ScopedMemoryAccess.loadFromMemorySegmentMasked(
                vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
                (AbstractMemorySegmentImpl) ms, offset, m, vsp, offsetInRange,
                (msp, off, s, vm) -> {
                    return s.ldLongOp((MemorySegment) msp, off, vm, $abstractvectortype$::memorySegmentGet);
                });
    }

    // Unchecked storing operations in native byte order.
    // Caller is responsible for applying index checks, masking, and
    // byte swapping.

    abstract
    void intoArray0($type$[] a, int offset);
    @ForceInline
    final
    void intoArray0Template($type$[] a, int offset) {
        $Type$Species vsp = vspecies();
        VectorSupport.store(
            vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
            a, arrayAddress(a, offset), false,
            this, a, offset,
            (arr, off, v)
            -> v.stOp(arr, (int) off,
                      (arr_, off_, i, e) -> arr_[off_+i] = e));
    }

    abstract
    void intoArray0($type$[] a, int offset, VectorMask<$Boxtype$> m);
    @ForceInline
    final
    <M extends VectorMask<$Boxtype$>>
    void intoArray0Template(Class<M> maskClass, $type$[] a, int offset, M m) {
        m.check(species());
        $Type$Species vsp = vspecies();
        VectorSupport.storeMasked(
            vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
            a, arrayAddress(a, offset), false,
            this, m, a, offset,
            (arr, off, v, vm)
            -> v.stOp(arr, (int) off, vm,
                      (arr_, off_, i, e) -> arr_[off_ + i] = e));
    }

#if[!byteOrShort]
    abstract
    void intoArray0($type$[] a, int offset,
                    int[] indexMap, int mapOffset,
                    VectorMask<$Boxtype$> m);
    @ForceInline
    final
    <M extends VectorMask<$Boxtype$>>
    void intoArray0Template(Class<M> maskClass, $type$[] a, int offset,
                            int[] indexMap, int mapOffset, M m) {
        m.check(species());
        $Type$Species vsp = vspecies();
        IntVector.IntSpecies isp = IntVector.species(vsp.indexShape());
#if[longOrDouble]
        if (vsp.laneCount() == 1) {
            intoArray(a, offset + indexMap[mapOffset], m);
            return;
        }

        // Index vector: vix[0:n] = i -> offset + indexMap[mo + i]
        IntVector vix;
        if (isp.laneCount() != vsp.laneCount()) {
            // For $Type$MaxVector,  if vector length  is 2048 bits, indexShape
            // of $Type$ species is S_MAX_BIT. and the lane count of $Type$
            // vector is 32. When converting $Type$ species to int species,
            // indexShape is still S_MAX_BIT, but the lane count of int vector
            // is 64. So when loading index vector (IntVector), only lower half
            // of index data is needed.
            vix = IntVector
                .fromArray(isp, indexMap, mapOffset, IntMaxVector.IntMaxMask.LOWER_HALF_TRUE_MASK)
                .add(offset);
        } else {
            vix = IntVector
                .fromArray(isp, indexMap, mapOffset)
                .add(offset);
        }

#else[longOrDouble]
        // Index vector: vix[0:n] = i -> offset + indexMap[mo + i]
        IntVector vix = IntVector
            .fromArray(isp, indexMap, mapOffset)
            .add(offset);
#end[longOrDouble]

        // FIXME: Check index under mask controlling.
        vix = VectorIntrinsics.checkIndex(vix, a.length);

        VectorSupport.storeWithMap(
            vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
            isp.vectorType(),
            a, arrayAddress(a, 0), vix,
            this, m,
            a, offset, indexMap, mapOffset,
            (arr, off, v, map, mo, vm)
            -> v.stOp(arr, off, vm,
                      (arr_, off_, i, e) -> {
                          int j = map[mo + i];
                          arr[off + j] = e;
                      }));
    }
#end[!byteOrShort]

#if[byte]
    abstract
    void intoBooleanArray0(boolean[] a, int offset, VectorMask<$Boxtype$> m);
    @ForceInline
    final
    <M extends VectorMask<$Boxtype$>>
    void intoBooleanArray0Template(Class<M> maskClass, boolean[] a, int offset, M m) {
        m.check(species());
        $Type$Species vsp = vspecies();
        ByteVector normalized = this.and((byte) 1);
        VectorSupport.storeMasked(
            vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
            a, booleanArrayAddress(a, offset), false,
            normalized, m, a, offset,
            (arr, off, v, vm)
            -> v.stOp(arr, (int) off, vm,
                      (arr_, off_, i, e) -> arr_[off_ + i] = (e & 1) != 0));
    }
#end[byte]

    @ForceInline
    final
    void intoMemorySegment0(MemorySegment ms, long offset) {
        $Type$Species vsp = vspecies();
        ScopedMemoryAccess.storeIntoMemorySegment(
                vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
                this,
                (AbstractMemorySegmentImpl) ms, offset,
                (msp, off, v) -> {
                    v.stLongOp((MemorySegment) msp, off, $abstractvectortype$::memorySegmentSet);
                });
    }

    abstract
    void intoMemorySegment0(MemorySegment bb, long offset, VectorMask<$Boxtype$> m);
    @ForceInline
    final
    <M extends VectorMask<$Boxtype$>>
    void intoMemorySegment0Template(Class<M> maskClass, MemorySegment ms, long offset, M m) {
        $Type$Species vsp = vspecies();
        m.check(vsp);
        ScopedMemoryAccess.storeIntoMemorySegmentMasked(
                vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
                this, m,
                (AbstractMemorySegmentImpl) ms, offset,
                (msp, off, v, vm) -> {
                    v.stLongOp((MemorySegment) msp, off, vm, $abstractvectortype$::memorySegmentSet);
                });
    }

#if[short]
    /*package-private*/
    abstract
    void intoCharArray0(char[] a, int offset, VectorMask<$Boxtype$> m);
    @ForceInline
    final
    <M extends VectorMask<$Boxtype$>>
    void intoCharArray0Template(Class<M> maskClass, char[] a, int offset, M m) {
        m.check(species());
        $Type$Species vsp = vspecies();
        VectorSupport.storeMasked(
            vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
            a, charArrayAddress(a, offset), false,
            this, m, a, offset,
            (arr, off, v, vm)
            -> v.stOp(arr, (int) off, vm,
                      (arr_, off_, i, e) -> arr_[off_ + i] = (char) e));
    }
#end[short]

    // End of low-level memory operations.

    @ForceInline
    private void conditionalStoreNYI(int offset,
                                     $Type$Species vsp,
                                     VectorMask<$Boxtype$> m,
                                     int scale,
                                     int limit) {
        if (offset < 0 || offset + vsp.laneCount() * scale > limit) {
            String msg =
                String.format("unimplemented: store @%d in [0..%d), %s in %s",
                              offset, limit, m, vsp);
            throw new AssertionError(msg);
        }
    }

    /*package-private*/
    @Override
    @ForceInline
    final
    $abstractvectortype$ maybeSwap(ByteOrder bo) {
#if[!byte]
        if (bo != NATIVE_ENDIAN) {
            return this.reinterpretAsBytes()
                .rearrange(swapBytesShuffle())
                .reinterpretAs$Type$s();
        }
#end[!byte]
        return this;
    }

    static final int ARRAY_SHIFT =
        31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_$TYPE$_INDEX_SCALE);
    static final long ARRAY_BASE =
        Unsafe.ARRAY_$TYPE$_BASE_OFFSET;

    @ForceInline
    static long arrayAddress($type$[] a, int index) {
        return ARRAY_BASE + (((long)index) << ARRAY_SHIFT);
    }

#if[short]
    static final int ARRAY_CHAR_SHIFT =
            31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_CHAR_INDEX_SCALE);
    static final long ARRAY_CHAR_BASE =
            Unsafe.ARRAY_CHAR_BASE_OFFSET;

    @ForceInline
    static long charArrayAddress(char[] a, int index) {
        return ARRAY_CHAR_BASE + (((long)index) << ARRAY_CHAR_SHIFT);
    }
#end[short]

#if[byte]
    static final int ARRAY_BOOLEAN_SHIFT =
            31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_BOOLEAN_INDEX_SCALE);
    static final long ARRAY_BOOLEAN_BASE =
            Unsafe.ARRAY_BOOLEAN_BASE_OFFSET;

    @ForceInline
    static long booleanArrayAddress(boolean[] a, int index) {
        return ARRAY_BOOLEAN_BASE + (((long)index) << ARRAY_BOOLEAN_SHIFT);
    }
#end[byte]

    @ForceInline
    static long byteArrayAddress(byte[] a, int index) {
        return Unsafe.ARRAY_BYTE_BASE_OFFSET + index;
    }

    // ================================================

    /// Reinterpreting view methods:
    //   lanewise reinterpret: viewAsXVector()
    //   keep shape, redraw lanes: reinterpretAsEs()

    /**
     * {@inheritDoc} <!--workaround-->
     */
    @ForceInline
    @Override
    public final ByteVector reinterpretAsBytes() {
#if[byte]
        return this;
#else[byte]
         // Going to ByteVector, pay close attention to byte order.
         assert(REGISTER_ENDIAN == ByteOrder.LITTLE_ENDIAN);
         return asByteVectorRaw();
         //return asByteVectorRaw().rearrange(swapBytesShuffle());
#end[byte]
    }

    /**
     * {@inheritDoc} <!--workaround-->
     */
    @ForceInline
    @Override
    public final $Bitstype$Vector viewAsIntegralLanes() {
#if[BITWISE]
        return this;
#else[BITWISE]
        LaneType ilt = LaneType.$TYPE$.asIntegral();
        return ($Bitstype$Vector) asVectorRaw(ilt);
#end[BITWISE]
    }

    /**
     * {@inheritDoc} <!--workaround-->
#if[byteOrShort]
     *
     * @implNote This method always throws
     * {@code UnsupportedOperationException}, because there is no floating
     * point type of the same size as {@code $type$}.  The return type
     * of this method is arbitrarily designated as
     * {@code Vector<?>}.  Future versions of this API may change the return
     * type if additional floating point types become available.
#end[byteOrShort]
     */
    @ForceInline
    @Override
    public final
    {#if[byteOrShort]?Vector<?>:$Fptype$Vector}
    viewAsFloatingLanes() {
#if[FP]
        return this;
#else[FP]
        LaneType flt = LaneType.$TYPE$.asFloating();
#if[!byteOrShort]
        return ($Fptype$Vector) asVectorRaw(flt);
#else[!byteOrShort]
        // asFloating() will throw UnsupportedOperationException for the unsupported type $type$
        throw new AssertionError("Cannot reach here");
#end[!byteOrShort]
#end[FP]
    }

    // ================================================

    /// Object methods: toString, equals, hashCode
    //
    // Object methods are defined as if via Arrays.toString, etc.,
    // is applied to the array of elements.  Two equal vectors
    // are required to have equal species and equal lane values.

    /**
     * Returns a string representation of this vector, of the form
     * {@code "[0,1,2...]"}, reporting the lane values of this vector,
     * in lane order.
     *
     * The string is produced as if by a call to {@link
     * java.util.Arrays#toString($type$[]) Arrays.toString()},
     * as appropriate to the {@code $type$} array returned by
     * {@link #toArray this.toArray()}.
     *
     * @return a string of the form {@code "[0,1,2...]"}
     * reporting the lane values of this vector
     */
    @Override
    @ForceInline
    public final
    String toString() {
        // now that toArray is strongly typed, we can define this
        return Arrays.toString(toArray());
    }

    /**
     * {@inheritDoc} <!--workaround-->
     */
    @Override
    @ForceInline
    public final
    boolean equals(Object obj) {
        if (obj instanceof Vector) {
            Vector<?> that = (Vector<?>) obj;
            if (this.species().equals(that.species())) {
                return this.eq(that.check(this.species())).allTrue();
            }
        }
        return false;
    }

    /**
     * {@inheritDoc} <!--workaround-->
     */
    @Override
    @ForceInline
    public final
    int hashCode() {
        // now that toArray is strongly typed, we can define this
        return Objects.hash(species(), Arrays.hashCode(toArray()));
    }

    // ================================================

    // Species

    /**
     * Class representing {@link $abstractvectortype$}'s of the same {@link VectorShape VectorShape}.
     */
    /*package-private*/
    static final class $Type$Species extends AbstractSpecies<$Boxtype$> {
        private $Type$Species(VectorShape shape,
                Class<? extends $abstractvectortype$> vectorType,
                Class<? extends AbstractMask<$Boxtype$>> maskType,
                Class<? extends AbstractShuffle<$Boxtype$>> shuffleType,
                Function<Object, $abstractvectortype$> vectorFactory) {
            super(shape, LaneType.of($type$.class),
                  vectorType, maskType, shuffleType,
                  vectorFactory);
            assert(this.elementSize() == $Boxtype$.SIZE);
        }

        // Specializing overrides:

        @Override
        @ForceInline
        public final Class<$Boxtype$> elementType() {
            return $type$.class;
        }

        @Override
        @ForceInline
        final Class<$Boxtype$> genericElementType() {
            return $Boxtype$.class;
        }

        @SuppressWarnings("unchecked")
        @Override
        @ForceInline
        public final Class<? extends $Type$Vector> vectorType() {
            return (Class<? extends $Type$Vector>) vectorType;
        }

        @Override
        @ForceInline
        public final long checkValue(long e) {
            longToElementBits(e);  // only for exception
            return e;
        }

        /*package-private*/
        @Override
        @ForceInline
        final $abstractvectortype$ broadcastBits(long bits) {
            return ($abstractvectortype$)
                VectorSupport.fromBitsCoerced(
                    vectorType, $type$.class, laneCount,
                    bits, MODE_BROADCAST, this,
                    (bits_, s_) -> s_.rvOp(i -> bits_));
        }

        /*package-private*/
        @ForceInline
        {#if[long]?public }final $abstractvectortype$ broadcast($type$ e) {
            return broadcastBits(toBits(e));
        }

#if[!long]
        @Override
        @ForceInline
        public final $abstractvectortype$ broadcast(long e) {
            return broadcastBits(longToElementBits(e));
        }
#end[!long]

        /*package-private*/
        final @Override
        @ForceInline
        long longToElementBits(long value) {
#if[long]
            // In this case, the conversion can never fail.
            return value;
#else[long]
            // Do the conversion, and then test it for failure.
            $type$ e = ($type$) value;
            if ((long) e != value) {
                throw badElementBits(value, e);
            }
            return toBits(e);
#end[long]
        }

        /*package-private*/
        @ForceInline
        static long toIntegralChecked($type$ e, boolean convertToInt) {
            long value = convertToInt ? (int) e : (long) e;
            if (($type$) value != e) {
                throw badArrayBits(e, convertToInt, value);
            }
            return value;
        }

        /* this non-public one is for internal conversions */
        @Override
        @ForceInline
        final $abstractvectortype$ fromIntValues(int[] values) {
            VectorIntrinsics.requireLength(values.length, laneCount);
            $type$[] va = new $type$[laneCount()];
            for (int i = 0; i < va.length; i++) {
                int lv = values[i];
                $type$ v = ($type$) lv;
                va[i] = v;
                if ((int)v != lv) {
                    throw badElementBits(lv, v);
                }
            }
            return dummyVector().fromArray0(va, 0);
        }

        // Virtual constructors

        @ForceInline
        @Override final
        public $abstractvectortype$ fromArray(Object a, int offset) {
            // User entry point
            // Defer only to the equivalent method on the vector class, using the same inputs
            return $abstractvectortype$
                .fromArray(this, ($type$[]) a, offset);
        }

        @ForceInline
        @Override final
        public $abstractvectortype$ fromMemorySegment(MemorySegment ms, long offset, ByteOrder bo) {
            // User entry point
            // Defer only to the equivalent method on the vector class, using the same inputs
            return $abstractvectortype$
                .fromMemorySegment(this, ms, offset, bo);
        }

        @ForceInline
        @Override final
        $abstractvectortype$ dummyVector() {
            return ($abstractvectortype$) super.dummyVector();
        }

        /*package-private*/
        final @Override
        @ForceInline
        $abstractvectortype$ rvOp(RVOp f) {
            $type$[] res = new $type$[laneCount()];
            for (int i = 0; i < res.length; i++) {
                $bitstype$ bits = {#if[!long]?($bitstype$)} f.apply(i);
                res[i] = fromBits(bits);
            }
            return dummyVector().vectorFactory(res);
        }

        $Type$Vector vOp(FVOp f) {
            $type$[] res = new $type$[laneCount()];
            for (int i = 0; i < res.length; i++) {
                res[i] = f.apply(i);
            }
            return dummyVector().vectorFactory(res);
        }

        $Type$Vector vOp(VectorMask<$Boxtype$> m, FVOp f) {
            $type$[] res = new $type$[laneCount()];
            boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits();
            for (int i = 0; i < res.length; i++) {
                if (mbits[i]) {
                    res[i] = f.apply(i);
                }
            }
            return dummyVector().vectorFactory(res);
        }

        /*package-private*/
        @ForceInline
        <M> $abstractvectortype$ ldOp(M memory, int offset,
                                      FLdOp<M> f) {
            return dummyVector().ldOp(memory, offset, f);
        }

        /*package-private*/
        @ForceInline
        <M> $abstractvectortype$ ldOp(M memory, int offset,
                                      VectorMask<$Boxtype$> m,
                                      FLdOp<M> f) {
            return dummyVector().ldOp(memory, offset, m, f);
        }

        /*package-private*/
        @ForceInline
        $abstractvectortype$ ldLongOp(MemorySegment memory, long offset,
                                      FLdLongOp f) {
            return dummyVector().ldLongOp(memory, offset, f);
        }

        /*package-private*/
        @ForceInline
        $abstractvectortype$ ldLongOp(MemorySegment memory, long offset,
                                      VectorMask<$Boxtype$> m,
                                      FLdLongOp f) {
            return dummyVector().ldLongOp(memory, offset, m, f);
        }

        /*package-private*/
        @ForceInline
        <M> void stOp(M memory, int offset, FStOp<M> f) {
            dummyVector().stOp(memory, offset, f);
        }

        /*package-private*/
        @ForceInline
        <M> void stOp(M memory, int offset,
                      AbstractMask<$Boxtype$> m,
                      FStOp<M> f) {
            dummyVector().stOp(memory, offset, m, f);
        }

        /*package-private*/
        @ForceInline
        void stLongOp(MemorySegment memory, long offset, FStLongOp f) {
            dummyVector().stLongOp(memory, offset, f);
        }

        /*package-private*/
        @ForceInline
        void stLongOp(MemorySegment memory, long offset,
                      AbstractMask<$Boxtype$> m,
                      FStLongOp f) {
            dummyVector().stLongOp(memory, offset, m, f);
        }

        // N.B. Make sure these constant vectors and
        // masks load up correctly into registers.
        //
        // Also, see if we can avoid all that switching.
        // Could we cache both vectors and both masks in
        // this species object?

        // Zero and iota vector access
        @Override
        @ForceInline
        public final $abstractvectortype$ zero() {
            if ((Class<?>) vectorType() == $Type$MaxVector.class)
                return $Type$MaxVector.ZERO;
            switch (vectorBitSize()) {
                case 64: return $Type$64Vector.ZERO;
                case 128: return $Type$128Vector.ZERO;
                case 256: return $Type$256Vector.ZERO;
                case 512: return $Type$512Vector.ZERO;
            }
            throw new AssertionError();
        }

        @Override
        @ForceInline
        public final $abstractvectortype$ iota() {
            if ((Class<?>) vectorType() == $Type$MaxVector.class)
                return $Type$MaxVector.IOTA;
            switch (vectorBitSize()) {
                case 64: return $Type$64Vector.IOTA;
                case 128: return $Type$128Vector.IOTA;
                case 256: return $Type$256Vector.IOTA;
                case 512: return $Type$512Vector.IOTA;
            }
            throw new AssertionError();
        }

        // Mask access
        @Override
        @ForceInline
        public final VectorMask<$Boxtype$> maskAll(boolean bit) {
            if ((Class<?>) vectorType() == $Type$MaxVector.class)
                return $Type$MaxVector.$Type$MaxMask.maskAll(bit);
            switch (vectorBitSize()) {
                case 64: return $Type$64Vector.$Type$64Mask.maskAll(bit);
                case 128: return $Type$128Vector.$Type$128Mask.maskAll(bit);
                case 256: return $Type$256Vector.$Type$256Mask.maskAll(bit);
                case 512: return $Type$512Vector.$Type$512Mask.maskAll(bit);
            }
            throw new AssertionError();
        }
    }

    /**
     * Finds a species for an element type of {@code $type$} and shape.
     *
     * @param s the shape
     * @return a species for an element type of {@code $type$} and shape
     * @throws IllegalArgumentException if no such species exists for the shape
     */
    static $Type$Species species(VectorShape s) {
        Objects.requireNonNull(s);
        switch (s.switchKey) {
            case VectorShape.SK_64_BIT: return ($Type$Species) SPECIES_64;
            case VectorShape.SK_128_BIT: return ($Type$Species) SPECIES_128;
            case VectorShape.SK_256_BIT: return ($Type$Species) SPECIES_256;
            case VectorShape.SK_512_BIT: return ($Type$Species) SPECIES_512;
            case VectorShape.SK_Max_BIT: return ($Type$Species) SPECIES_MAX;
            default: throw new IllegalArgumentException("Bad shape: " + s);
        }
    }

    /** Species representing {@link $Type$Vector}s of {@link VectorShape#S_64_BIT VectorShape.S_64_BIT}. */
    public static final VectorSpecies<$Boxtype$> SPECIES_64
        = new $Type$Species(VectorShape.S_64_BIT,
                            $Type$64Vector.class,
                            $Type$64Vector.$Type$64Mask.class,
                            $Type$64Vector.$Type$64Shuffle.class,
                            $Type$64Vector::new);

    /** Species representing {@link $Type$Vector}s of {@link VectorShape#S_128_BIT VectorShape.S_128_BIT}. */
    public static final VectorSpecies<$Boxtype$> SPECIES_128
        = new $Type$Species(VectorShape.S_128_BIT,
                            $Type$128Vector.class,
                            $Type$128Vector.$Type$128Mask.class,
                            $Type$128Vector.$Type$128Shuffle.class,
                            $Type$128Vector::new);

    /** Species representing {@link $Type$Vector}s of {@link VectorShape#S_256_BIT VectorShape.S_256_BIT}. */
    public static final VectorSpecies<$Boxtype$> SPECIES_256
        = new $Type$Species(VectorShape.S_256_BIT,
                            $Type$256Vector.class,
                            $Type$256Vector.$Type$256Mask.class,
                            $Type$256Vector.$Type$256Shuffle.class,
                            $Type$256Vector::new);

    /** Species representing {@link $Type$Vector}s of {@link VectorShape#S_512_BIT VectorShape.S_512_BIT}. */
    public static final VectorSpecies<$Boxtype$> SPECIES_512
        = new $Type$Species(VectorShape.S_512_BIT,
                            $Type$512Vector.class,
                            $Type$512Vector.$Type$512Mask.class,
                            $Type$512Vector.$Type$512Shuffle.class,
                            $Type$512Vector::new);

    /** Species representing {@link $Type$Vector}s of {@link VectorShape#S_Max_BIT VectorShape.S_Max_BIT}. */
    public static final VectorSpecies<$Boxtype$> SPECIES_MAX
        = new $Type$Species(VectorShape.S_Max_BIT,
                            $Type$MaxVector.class,
                            $Type$MaxVector.$Type$MaxMask.class,
                            $Type$MaxVector.$Type$MaxShuffle.class,
                            $Type$MaxVector::new);

    /**
     * Preferred species for {@link $Type$Vector}s.
     * A preferred species is a species of maximal bit-size for the platform.
     */
    public static final VectorSpecies<$Boxtype$> SPECIES_PREFERRED
        = ($Type$Species) VectorSpecies.ofPreferred($type$.class);
}

