ia32-64/x86/vpshldv.html
2025-07-08 02:23:29 -03:00

229 lines
8.8 KiB
HTML
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:svg="http://www.w3.org/2000/svg" xmlns:x86="http://www.felixcloutier.com/x86"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><link rel="stylesheet" type="text/css" href="style.css"></link><title>VPSHLDV
— Concatenate and Variable Shift Packed Data Left Logical</title></head><body><header><nav><ul><li><a href='index.html'>Index</a></li><li>December 2023</li></ul></nav></header><h1>VPSHLDV
— Concatenate and Variable Shift Packed Data Left Logical</h1>
<table>
<tr>
<th>Opcode/Instruction</th>
<th>Op/En</th>
<th>64/32 bit Mode Support</th>
<th>CPUID Feature Flag</th>
<th>Description</th></tr>
<tr>
<td>EVEX.128.66.0F38.W1 70 /r VPSHLDVW xmm1{k1}{z}, xmm2, xmm3/m128</td>
<td>A</td>
<td>V/V</td>
<td>AVX512_VBMI2 AVX512VL</td>
<td>Concatenate xmm1 and xmm2, extract result shifted to the left by value in xmm3/m128 into xmm1.</td></tr>
<tr>
<td>EVEX.256.66.0F38.W1 70 /r VPSHLDVW ymm1{k1}{z}, ymm2, ymm3/m256</td>
<td>A</td>
<td>V/V</td>
<td>AVX512_VBMI2 AVX512VL</td>
<td>Concatenate ymm1 and ymm2, extract result shifted to the left by value in xmm3/m256 into ymm1.</td></tr>
<tr>
<td>EVEX.512.66.0F38.W1 70 /r VPSHLDVW zmm1{k1}{z}, zmm2, zmm3/m512</td>
<td>A</td>
<td>V/V</td>
<td>AVX512_VBMI2</td>
<td>Concatenate zmm1 and zmm2, extract result shifted to the left by value in zmm3/m512 into zmm1.</td></tr>
<tr>
<td>EVEX.128.66.0F38.W0 71 /r VPSHLDVD xmm1{k1}{z}, xmm2, xmm3/m128/m32bcst</td>
<td>B</td>
<td>V/V</td>
<td>AVX512_VBMI2 AVX512VL</td>
<td>Concatenate xmm1 and xmm2, extract result shifted to the left by value in xmm3/m128 into xmm1.</td></tr>
<tr>
<td>EVEX.256.66.0F38.W0 71 /r VPSHLDVD ymm1{k1}{z}, ymm2, ymm3/m256/m32bcst</td>
<td>B</td>
<td>V/V</td>
<td>AVX512_VBMI2 AVX512VL</td>
<td>Concatenate ymm1 and ymm2, extract result shifted to the left by value in xmm3/m256 into ymm1.</td></tr>
<tr>
<td>EVEX.512.66.0F38.W0 71 /r VPSHLDVD zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst</td>
<td>B</td>
<td>V/V</td>
<td>AVX512_VBMI2</td>
<td>Concatenate zmm1 and zmm2, extract result shifted to the left by value in zmm3/m512 into zmm1.</td></tr>
<tr>
<td>EVEX.128.66.0F38.W1 71 /r VPSHLDVQ xmm1{k1}{z}, xmm2, xmm3/m128/m64bcst</td>
<td>B</td>
<td>V/V</td>
<td>AVX512_VBMI2 AVX512VL</td>
<td>Concatenate xmm1 and xmm2, extract result shifted to the left by value in xmm3/m128 into xmm1.</td></tr>
<tr>
<td>EVEX.256.66.0F38.W1 71 /r VPSHLDVQ ymm1{k1}{z}, ymm2, ymm3/m256/m64bcst</td>
<td>B</td>
<td>V/V</td>
<td>AVX512_VBMI2 AVX512VL</td>
<td>Concatenate ymm1 and ymm2, extract result shifted to the left by value in xmm3/m256 into ymm1.</td></tr>
<tr>
<td>EVEX.512.66.0F38.W1 71 /r VPSHLDVQ zmm1{k1}{z}, zmm2, zmm3/m512/m64bcst</td>
<td>B</td>
<td>V/V</td>
<td>AVX512_VBMI2</td>
<td>Concatenate zmm1 and zmm2, extract result shifted to the left by value in zmm3/m512 into zmm1.</td></tr></table>
<h2 id="instruction-operand-encoding">Instruction Operand Encoding<a class="anchor" href="#instruction-operand-encoding">
</a></h2>
<table>
<tr>
<th>Op/En</th>
<th>Tuple</th>
<th>Operand 1</th>
<th>Operand 2</th>
<th>Operand 3</th>
<th>Operand 4</th></tr>
<tr>
<td>A</td>
<td>Full Mem</td>
<td>ModRM:reg (r, w)</td>
<td>EVEX.vvvv (r)</td>
<td>ModRM:r/m (r)</td>
<td>N/A</td></tr>
<tr>
<td>B</td>
<td>Full</td>
<td>ModRM:reg (r, w)</td>
<td>EVEX.vvvv (r)</td>
<td>ModRM:r/m (r)</td>
<td>N/A</td></tr></table>
<h3 id="description">Description<a class="anchor" href="#description">
</a></h3>
<p>Concatenate packed data, extract result shifted to the left by variable value.</p>
<p>This instruction supports memory fault suppression.</p>
<h3 id="operation">Operation<a class="anchor" href="#operation">
</a></h3>
<pre>FUNCTION concat(a,b):
IF words:
d.word[1] := a
d.word[0] := b
return d
ELSE IF dwords:
q.dword[1] := a
q.dword[0] := b
return q
ELSE IF qwords:
o.qword[1] := a
o.qword[0] := b
return o
</pre>
<h4 id="vpshldvw-dest--src2--src3">VPSHLDVW DEST, SRC2, SRC3<a class="anchor" href="#vpshldvw-dest--src2--src3">
</a></h4>
<pre>(KL, VL) = (8, 128), (16, 256), (32, 512)
FOR j := 0 TO KL-1:
IF MaskBit(j) OR *no writemask*:
tmp := concat(DEST.word[j], SRC2.word[j]) &lt;&lt; (SRC3.word[j] &amp; 15)
DEST.word[j] := tmp.word[1]
ELSE IF *zeroing*:
DEST.word[j] := 0
*ELSE DEST.word[j] remains unchanged*
DEST[MAX_VL-1:VL] := 0
</pre>
<h4 id="vpshldvd-dest--src2--src3">VPSHLDVD DEST, SRC2, SRC3<a class="anchor" href="#vpshldvd-dest--src2--src3">
</a></h4>
<pre>(KL, VL) = (4, 128), (8, 256), (16, 512)
FOR j := 0 TO KL-1:
IF SRC3 is broadcast memop:
tsrc3 := SRC3.dword[0]
ELSE:
tsrc3 := SRC3.dword[j]
IF MaskBit(j) OR *no writemask*:
tmp := concat(DEST.dword[j], SRC2.dword[j]) &lt;&lt; (tsrc3 &amp; 31)
DEST.dword[j] := tmp.dword[1]
ELSE IF *zeroing*:
DEST.dword[j] := 0
*ELSE DEST.dword[j] remains unchanged*
DEST[MAX_VL-1:VL] := 0
</pre>
<h4 id="vpshldvq-dest--src2--src3">VPSHLDVQ DEST, SRC2, SRC3<a class="anchor" href="#vpshldvq-dest--src2--src3">
</a></h4>
<pre>(KL, VL) = (2, 128), (4, 256), (8, 512)
FOR j := 0 TO KL-1:
IF SRC3 is broadcast memop:
tsrc3 := SRC3.qword[0]
ELSE:
tsrc3 := SRC3.qword[j]
IF MaskBit(j) OR *no writemask*:
tmp := concat(DEST.qword[j], SRC2.qword[j]) &lt;&lt; (tsrc3 &amp; 63)
DEST.qword[j] := tmp.qword[1]
ELSE IF *zeroing*:
DEST.qword[j] := 0
*ELSE DEST.qword[j] remains unchanged*
DEST[MAX_VL-1:VL] := 0
</pre>
<h3 id="intel-c-c++-compiler-intrinsic-equivalent">Intel C/C++ Compiler Intrinsic Equivalent<a class="anchor" href="#intel-c-c++-compiler-intrinsic-equivalent">
</a></h3>
<pre>VPSHLDVW __m128i _mm_shldv_epi16(__m128i, __m128i, __m128i);
</pre>
<pre>VPSHLDVW __m128i _mm_mask_shldv_epi16(__m128i, __mmask8, __m128i, __m128i);
</pre>
<pre>VPSHLDVW __m128i _mm_maskz_shldv_epi16(__mmask8, __m128i, __m128i, __m128i);
</pre>
<pre>VPSHLDVW __m256i _mm256_shldv_epi16(__m256i, __m256i, __m256i);
</pre>
<pre>VPSHLDVW __m256i _mm256_mask_shldv_epi16(__m256i, __mmask16, __m256i, __m256i);
</pre>
<pre>VPSHLDVW __m256i _mm256_maskz_shldv_epi16(__mmask16, __m256i, __m256i, __m256i);
</pre>
<pre>VPSHLDVQ __m512i _mm512_shldv_epi64(__m512i, __m512i, __m512i);
</pre>
<pre>VPSHLDVQ __m512i _mm512_mask_shldv_epi64(__m512i, __mmask8, __m512i, __m512i);
</pre>
<pre>VPSHLDVQ __m512i _mm512_maskz_shldv_epi64(__mmask8, __m512i, __m512i, __m512i);
</pre>
<pre>VPSHLDVW __m128i _mm_shldv_epi16(__m128i, __m128i, __m128i);
</pre>
<pre>VPSHLDVW __m128i _mm_mask_shldv_epi16(__m128i, __mmask8, __m128i, __m128i);
</pre>
<pre>VPSHLDVW __m128i _mm_maskz_shldv_epi16(__mmask8, __m128i, __m128i, __m128i);
</pre>
<pre>VPSHLDVW __m256i _mm256_shldv_epi16(__m256i, __m256i, __m256i);
</pre>
<pre>VPSHLDVW __m256i _mm256_mask_shldv_epi16(__m256i, __mmask16, __m256i, __m256i);
</pre>
<pre>VPSHLDVW __m256i _mm256_maskz_shldv_epi16(__mmask16, __m256i, __m256i, __m256i);
</pre>
<pre>VPSHLDVW __m512i _mm512_shldv_epi16(__m512i, __m512i, __m512i);
</pre>
<pre>VPSHLDVW __m512i _mm512_mask_shldv_epi16(__m512i, __mmask32, __m512i, __m512i);
</pre>
<pre>VPSHLDVW __m512i _mm512_maskz_shldv_epi16(__mmask32, __m512i, __m512i, __m512i);
</pre>
<pre>VPSHLDVD __m128i _mm_shldv_epi32(__m128i, __m128i, __m128i);
</pre>
<pre>VPSHLDVD __m128i _mm_mask_shldv_epi32(__m128i, __mmask8, __m128i, __m128i);
</pre>
<pre>VPSHLDVD __m128i _mm_maskz_shldv_epi32(__mmask8, __m128i, __m128i, __m128i);
</pre>
<pre>VPSHLDVD __m256i _mm256_shldv_epi32(__m256i, __m256i, __m256i);
</pre>
<pre>VPSHLDVD __m256i _mm256_mask_shldv_epi32(__m256i, __mmask8, __m256i, __m256i);
</pre>
<pre>VPSHLDVD __m256i _mm256_maskz_shldv_epi32(__mmask8, __m256i, __m256i, __m256i);
</pre>
<pre>VPSHLDVD __m512i _mm512_shldv_epi32(__m512i, __m512i, __m512i);
</pre>
<pre>VPSHLDVD __m512i _mm512_mask_shldv_epi32(__m512i, __mmask16, __m512i, __m512i);
</pre>
<pre>VPSHLDVD __m512i _mm512_maskz_shldv_epi32(__mmask16, __m512i, __m512i, __m512i);
</pre>
<h3 class="exceptions" id="simd-floating-point-exceptions">SIMD Floating-Point Exceptions<a class="anchor" href="#simd-floating-point-exceptions">
</a></h3>
<p>None.</p>
<h3 class="exceptions" id="other-exceptions">Other Exceptions<a class="anchor" href="#other-exceptions">
</a></h3>
<p>See <span class="not-imported">Table 2-49</span>, “Type E4 Class Exception Conditions.”</p><footer><p>
This UNOFFICIAL, mechanically-separated, non-verified reference is provided for convenience, but it may be
inc<span style="opacity: 0.2">omp</span>lete or b<sub>r</sub>oke<sub>n</sub> in various obvious or non-obvious
ways. Refer to <a href="https://software.intel.com/en-us/download/intel-64-and-ia-32-architectures-sdm-combined-volumes-1-2a-2b-2c-2d-3a-3b-3c-3d-and-4">Intel® 64 and IA-32 Architectures Software Developers Manual</a> for anything serious.
</p></footer></body></html>