diff --git a/operand/checks.go b/operand/checks.go index ba1760bb..31a8491e 100644 --- a/operand/checks.go +++ b/operand/checks.go @@ -263,7 +263,7 @@ func IsVmz(op Op) bool { func isvm(op Op, idx func(Op) bool) bool { m, ok := op.(Mem) - return ok && IsR64(m.Base) && idx(m.Index) + return ok && (m.Base == nil || IsR64(m.Base)) && idx(m.Index) } // IsREL8 returns true if op is an 8-bit offset relative to instruction pointer. diff --git a/pass/verify.go b/pass/verify.go index 1e7b3683..2c524737 100644 --- a/pass/verify.go +++ b/pass/verify.go @@ -20,7 +20,7 @@ func VerifyMemOperands(i *ir.Instruction) error { continue } - if m.Base == nil { + if m.Base == nil && !(operand.IsVmx(m) || operand.IsVmy(m) || operand.IsVmz(m)) { return errors.New("bad memory operand: missing base register") } diff --git a/tests/fixedbugs/issue193/asm.go b/tests/fixedbugs/issue193/asm.go new file mode 100644 index 00000000..7eb3f4ae --- /dev/null +++ b/tests/fixedbugs/issue193/asm.go @@ -0,0 +1,64 @@ +//go:build ignore +// +build ignore + +package main + +import ( + . "github.com/mmcloughlin/avo/build" + . "github.com/mmcloughlin/avo/operand" + . "github.com/mmcloughlin/avo/reg" +) + +var offQ Mem + +func generateAddSub(useBase bool) { + funcName := "AddSubPairs" + if !useBase { + funcName += "NoBase" + } + + TEXT(funcName, 0, "func(a *[8]int64)") + Doc("Destructively Add/Subtract successive pairs") + offsets, evenVals, oddVals, result := YMM(), YMM(), YMM(), YMM() + + VMOVDQU64(offQ, offsets) + var aMem Mem + + if useBase { + aPtr := Load(Param("a"), GP64()) + aMem = Mem{Base: aPtr, Index: offsets, Scale: 8} + } else { + aPtrMem, err := Param("a").Resolve() + if err != nil { + panic(err) + } + VPSLLQ(Imm(3), offsets, offsets) + VPADDQ_BCST(aPtrMem.Addr, offsets, offsets) + aMem = Mem{Index: offsets, Scale: 1} + } + + k := K() + KXNORB(K0, K0, k) + VPGATHERQQ(aMem, k, evenVals) + KXNORB(K0, K0, k) + VPGATHERQQ(aMem.Offset(8), k, oddVals) + VPADDQ(oddVals, evenVals, result) + KXNORB(K0, K0, k) + VPSCATTERQQ(result, k, aMem) + VPSUBQ(oddVals, evenVals, result) + KXNORB(K0, K0, k) + VPSCATTERQQ(result, k, aMem.Offset(8)) + RET() +} + +func main() { + offQ = GLOBL("offQ", RODATA|NOPTR) + for i := 0; i < 4; i++ { + DATA(i*8, U64(2*i)) + } + + generateAddSub(true) + generateAddSub(false) + + Generate() +} diff --git a/tests/fixedbugs/issue193/doc.go b/tests/fixedbugs/issue193/doc.go new file mode 100644 index 00000000..bae58a3d --- /dev/null +++ b/tests/fixedbugs/issue193/doc.go @@ -0,0 +1,21 @@ +// Package issue193 tests for the ability to generate VPSCATTER{D,Q}{D,Q} and +// VPGATHER{D,Q}{D,Q} instructions *without* specifying a base register in +// the destination/source VSIB addressing (VM{64,32}{x,y,z}) memory operands. +package issue193 + +// From: https://www.felixcloutier.com/x86/vpscatterdd:vpscatterdq:vpscatterqd:vpscatterqq +// +// BASE_ADDR stands for the memory operand base address (a GPR); *may not exist* +// VINDEX stands for the memory operand vector of indices (a ZMM register) +// SCALE stands for the memory operand scalar (1, 2, 4 or 8) +// DISP is the optional 1 or 4 byte displacement +// +// For example, prior to this fix Avo requires: +// +// VPSCATTERQQ Z6, K1, 8(BX)(Z7*1) +// +// with BX = 0, when: +// +// VPSCATTERQQ Z6, K1, 8(Z7*1) +// +// is perfectly valid and will suffice. diff --git a/tests/fixedbugs/issue193/issue193.s b/tests/fixedbugs/issue193/issue193.s new file mode 100644 index 00000000..8dfeedc6 --- /dev/null +++ b/tests/fixedbugs/issue193/issue193.s @@ -0,0 +1,44 @@ +// Code generated by command: go run asm.go -out issue193.s -stubs stub.go. DO NOT EDIT. + +#include "textflag.h" + +DATA offQ<>+0(SB)/8, $0x0000000000000000 +DATA offQ<>+8(SB)/8, $0x0000000000000002 +DATA offQ<>+16(SB)/8, $0x0000000000000004 +DATA offQ<>+24(SB)/8, $0x0000000000000006 +GLOBL offQ<>(SB), RODATA|NOPTR, $32 + +// func AddSubPairs(a *[8]int64) +// Requires: AVX2, AVX512DQ, AVX512F, AVX512VL +TEXT ·AddSubPairs(SB), $0-8 + VMOVDQU64 offQ<>+0(SB), Y0 + MOVQ a+0(FP), AX + KXNORB K0, K0, K1 + VPGATHERQQ (AX)(Y0*8), K1, Y1 + KXNORB K0, K0, K1 + VPGATHERQQ 8(AX)(Y0*8), K1, Y2 + VPADDQ Y2, Y1, Y3 + KXNORB K0, K0, K1 + VPSCATTERQQ Y3, K1, (AX)(Y0*8) + VPSUBQ Y2, Y1, Y3 + KXNORB K0, K0, K1 + VPSCATTERQQ Y3, K1, 8(AX)(Y0*8) + RET + +// func AddSubPairsNoBase(a *[8]int64) +// Requires: AVX2, AVX512DQ, AVX512F, AVX512VL +TEXT ·AddSubPairsNoBase(SB), $0-8 + VMOVDQU64 offQ<>+0(SB), Y0 + VPSLLQ $0x03, Y0, Y0 + VPADDQ.BCST a+0(FP), Y0, Y0 + KXNORB K0, K0, K1 + VPGATHERQQ (Y0*1), K1, Y1 + KXNORB K0, K0, K1 + VPGATHERQQ 8(Y0*1), K1, Y2 + VPADDQ Y2, Y1, Y3 + KXNORB K0, K0, K1 + VPSCATTERQQ Y3, K1, (Y0*1) + VPSUBQ Y2, Y1, Y3 + KXNORB K0, K0, K1 + VPSCATTERQQ Y3, K1, 8(Y0*1) + RET diff --git a/tests/fixedbugs/issue193/issue193_test.go b/tests/fixedbugs/issue193/issue193_test.go new file mode 100644 index 00000000..e3d291a8 --- /dev/null +++ b/tests/fixedbugs/issue193/issue193_test.go @@ -0,0 +1,30 @@ +package issue193 + +import ( + "testing" + + "golang.org/x/sys/cpu" +) + +//go:generate go run asm.go -out issue193.s -stubs stub.go + +func TestAddSubs(t *testing.T) { + if !(cpu.X86.HasAVX2 && cpu.X86.HasAVX512F && cpu.X86.HasAVX512DQ && cpu.X86.HasAVX512VL) { + t.Skipf("Skipping issue193 test because runtime lacks needed CPUID features") + } + + in := [8]int64{7, 6, 5, 4, 3, 2, 1, 0} + out := [8]int64{13, 1, 9, 1, 5, 1, 1, 1} + + val := in + AddSubPairs(&val) + if val != out { + t.Errorf("Bad result %v", val) + } + + val = in + AddSubPairsNoBase(&val) + if val != out { + t.Errorf("Bad result %v", val) + } +} diff --git a/tests/fixedbugs/issue193/stub.go b/tests/fixedbugs/issue193/stub.go new file mode 100644 index 00000000..b7c0da25 --- /dev/null +++ b/tests/fixedbugs/issue193/stub.go @@ -0,0 +1,9 @@ +// Code generated by command: go run asm.go -out issue193.s -stubs stub.go. DO NOT EDIT. + +package issue193 + +// Destructively Add/Subtract successive pairs +func AddSubPairs(a *[8]int64) + +// Destructively Add/Subtract successive pairs +func AddSubPairsNoBase(a *[8]int64)