From c4661cc8eb9e1d0d10bc8953fb5fb6277590a132 Mon Sep 17 00:00:00 2001 From: Vaughn Iverson Date: Mon, 15 Nov 2021 13:03:15 -0800 Subject: [PATCH 1/6] Relax validation of SCATTER/GATHER memory ops --- operand/checks.go | 2 +- pass/verify.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/operand/checks.go b/operand/checks.go index ba1760bb..31a8491e 100644 --- a/operand/checks.go +++ b/operand/checks.go @@ -263,7 +263,7 @@ func IsVmz(op Op) bool { func isvm(op Op, idx func(Op) bool) bool { m, ok := op.(Mem) - return ok && IsR64(m.Base) && idx(m.Index) + return ok && (m.Base == nil || IsR64(m.Base)) && idx(m.Index) } // IsREL8 returns true if op is an 8-bit offset relative to instruction pointer. diff --git a/pass/verify.go b/pass/verify.go index 1e7b3683..2c524737 100644 --- a/pass/verify.go +++ b/pass/verify.go @@ -20,7 +20,7 @@ func VerifyMemOperands(i *ir.Instruction) error { continue } - if m.Base == nil { + if m.Base == nil && !(operand.IsVmx(m) || operand.IsVmy(m) || operand.IsVmz(m)) { return errors.New("bad memory operand: missing base register") } From 7b06e10e2d303bad941385e0f2d88049c5997d9c Mon Sep 17 00:00:00 2001 From: Vaughn Iverson Date: Wed, 14 Dec 2022 17:05:39 -0800 Subject: [PATCH 2/6] Added issue193 test case in fixedbugs --- tests/fixedbugs/issue193/asm.go | 66 +++++++++++++++++++++++ tests/fixedbugs/issue193/doc.go | 18 +++++++ tests/fixedbugs/issue193/issue193.s | 44 +++++++++++++++ tests/fixedbugs/issue193/issue193_test.go | 22 ++++++++ tests/fixedbugs/issue193/stub.go | 9 ++++ 5 files changed, 159 insertions(+) create mode 100644 tests/fixedbugs/issue193/asm.go create mode 100644 tests/fixedbugs/issue193/doc.go create mode 100644 tests/fixedbugs/issue193/issue193.s create mode 100644 tests/fixedbugs/issue193/issue193_test.go create mode 100644 tests/fixedbugs/issue193/stub.go diff --git a/tests/fixedbugs/issue193/asm.go b/tests/fixedbugs/issue193/asm.go new file mode 100644 index 00000000..a9199a44 --- /dev/null +++ b/tests/fixedbugs/issue193/asm.go @@ -0,0 +1,66 @@ +//go:build ignore +// +build ignore + +package main + +import ( + . "github.com/mmcloughlin/avo/build" + . "github.com/mmcloughlin/avo/operand" + . "github.com/mmcloughlin/avo/reg" +) + +var offQ Mem + +func generateAddSub(useBase bool) { + + funcName := "AddSubPairs" + if !useBase { + funcName += "NoBase" + } + + TEXT(funcName, 0, "func(a *[8]int64)") + Doc("Destructively Add/Subtract successive pairs") + offsets, evenVals, oddVals, result := YMM(), YMM(), YMM(), YMM() + + VMOVDQU64(offQ, offsets) + var aMem Mem + + if useBase { + aPtr := Load(Param("a"), GP64()) + aMem = Mem{Base: aPtr, Index: offsets, Scale: 8} + } else { + aPtrMem, err := Param("a").Resolve() + if err != nil { + panic(err) + } + VPSLLQ(Imm(3), offsets, offsets) + VPADDQ_BCST(aPtrMem.Addr, offsets, offsets) + aMem = Mem{Index: offsets, Scale: 1} + } + + k := K() + KXNORB(K0, K0, k) + VPGATHERQQ(aMem, k, evenVals) + KXNORB(K0, K0, k) + VPGATHERQQ(aMem.Offset(8), k, oddVals) + VPADDQ(oddVals, evenVals, result) + KXNORB(K0, K0, k) + VPSCATTERQQ(result, k, aMem) + VPSUBQ(oddVals, evenVals, result) + KXNORB(K0, K0, k) + VPSCATTERQQ(result, k, aMem.Offset(8)) + RET() +} + +func main() { + + offQ = GLOBL("offQ", RODATA|NOPTR) + for i := 0; i < 4; i++ { + DATA(i*8, U64(2*i)) + } + + generateAddSub(true) + generateAddSub(false) + + Generate() +} diff --git a/tests/fixedbugs/issue193/doc.go b/tests/fixedbugs/issue193/doc.go new file mode 100644 index 00000000..0acb1ef6 --- /dev/null +++ b/tests/fixedbugs/issue193/doc.go @@ -0,0 +1,18 @@ +// Package issue193 tests for the ability to generate VPSCATTER{D,Q}{D,Q} and +// VPGATHER{D,Q}{D,Q} instructions *without* specifying a base register in +// the destination/source VSIB addressing (VM{64,32}{x,y,z}) memory operands. +// +// From: https://www.felixcloutier.com/x86/vpscatterdd:vpscatterdq:vpscatterqd:vpscatterqq +// +// BASE_ADDR stands for the memory operand base address (a GPR); *may not exist* +// VINDEX stands for the memory operand vector of indices (a ZMM register) +// SCALE stands for the memory operand scalar (1, 2, 4 or 8) +// DISP is the optional 1 or 4 byte displacement +// +// For example, prior to this fix Avo requires: +// VPSCATTERQQ Z6, K1, 8(BX)(Z7*1) +// with BX = 0, when: +// VPSCATTERQQ Z6, K1, 8(Z7*1) +// is perfectly valid and will suffice. + +package issue193 diff --git a/tests/fixedbugs/issue193/issue193.s b/tests/fixedbugs/issue193/issue193.s new file mode 100644 index 00000000..8dfeedc6 --- /dev/null +++ b/tests/fixedbugs/issue193/issue193.s @@ -0,0 +1,44 @@ +// Code generated by command: go run asm.go -out issue193.s -stubs stub.go. DO NOT EDIT. + +#include "textflag.h" + +DATA offQ<>+0(SB)/8, $0x0000000000000000 +DATA offQ<>+8(SB)/8, $0x0000000000000002 +DATA offQ<>+16(SB)/8, $0x0000000000000004 +DATA offQ<>+24(SB)/8, $0x0000000000000006 +GLOBL offQ<>(SB), RODATA|NOPTR, $32 + +// func AddSubPairs(a *[8]int64) +// Requires: AVX2, AVX512DQ, AVX512F, AVX512VL +TEXT ·AddSubPairs(SB), $0-8 + VMOVDQU64 offQ<>+0(SB), Y0 + MOVQ a+0(FP), AX + KXNORB K0, K0, K1 + VPGATHERQQ (AX)(Y0*8), K1, Y1 + KXNORB K0, K0, K1 + VPGATHERQQ 8(AX)(Y0*8), K1, Y2 + VPADDQ Y2, Y1, Y3 + KXNORB K0, K0, K1 + VPSCATTERQQ Y3, K1, (AX)(Y0*8) + VPSUBQ Y2, Y1, Y3 + KXNORB K0, K0, K1 + VPSCATTERQQ Y3, K1, 8(AX)(Y0*8) + RET + +// func AddSubPairsNoBase(a *[8]int64) +// Requires: AVX2, AVX512DQ, AVX512F, AVX512VL +TEXT ·AddSubPairsNoBase(SB), $0-8 + VMOVDQU64 offQ<>+0(SB), Y0 + VPSLLQ $0x03, Y0, Y0 + VPADDQ.BCST a+0(FP), Y0, Y0 + KXNORB K0, K0, K1 + VPGATHERQQ (Y0*1), K1, Y1 + KXNORB K0, K0, K1 + VPGATHERQQ 8(Y0*1), K1, Y2 + VPADDQ Y2, Y1, Y3 + KXNORB K0, K0, K1 + VPSCATTERQQ Y3, K1, (Y0*1) + VPSUBQ Y2, Y1, Y3 + KXNORB K0, K0, K1 + VPSCATTERQQ Y3, K1, 8(Y0*1) + RET diff --git a/tests/fixedbugs/issue193/issue193_test.go b/tests/fixedbugs/issue193/issue193_test.go new file mode 100644 index 00000000..14d977d9 --- /dev/null +++ b/tests/fixedbugs/issue193/issue193_test.go @@ -0,0 +1,22 @@ +package issue193 + +import "testing" + +//go:generate go run asm.go -out issue193.s -stubs stub.go + +func TestAddSubs(t *testing.T) { + in := [8]int64{7, 6, 5, 4, 3, 2, 1, 0} + out := [8]int64{13, 1, 9, 1, 5, 1, 1, 1} + + val := in + AddSubPairs(&val) + if val != out { + t.Errorf("Bad result %v", val) + } + + val = in + AddSubPairsNoBase(&val) + if val != out { + t.Errorf("Bad result %v", val) + } +} diff --git a/tests/fixedbugs/issue193/stub.go b/tests/fixedbugs/issue193/stub.go new file mode 100644 index 00000000..b7c0da25 --- /dev/null +++ b/tests/fixedbugs/issue193/stub.go @@ -0,0 +1,9 @@ +// Code generated by command: go run asm.go -out issue193.s -stubs stub.go. DO NOT EDIT. + +package issue193 + +// Destructively Add/Subtract successive pairs +func AddSubPairs(a *[8]int64) + +// Destructively Add/Subtract successive pairs +func AddSubPairsNoBase(a *[8]int64) From d85a48897373f7a2641850b7a64d8d7d01663333 Mon Sep 17 00:00:00 2001 From: Vaughn Iverson Date: Wed, 14 Dec 2022 17:13:21 -0800 Subject: [PATCH 3/6] Lint ate some whitespace in asm.go --- tests/fixedbugs/issue193/asm.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/fixedbugs/issue193/asm.go b/tests/fixedbugs/issue193/asm.go index a9199a44..7eb3f4ae 100644 --- a/tests/fixedbugs/issue193/asm.go +++ b/tests/fixedbugs/issue193/asm.go @@ -12,7 +12,6 @@ import ( var offQ Mem func generateAddSub(useBase bool) { - funcName := "AddSubPairs" if !useBase { funcName += "NoBase" @@ -53,7 +52,6 @@ func generateAddSub(useBase bool) { } func main() { - offQ = GLOBL("offQ", RODATA|NOPTR) for i := 0; i < 4; i++ { DATA(i*8, U64(2*i)) From 298c0413b623362e515e8b1416eaeaf5345212bc Mon Sep 17 00:00:00 2001 From: Vaughn Iverson Date: Wed, 14 Dec 2022 17:24:29 -0800 Subject: [PATCH 4/6] Fix failing test due to no AVX512 support in CI --- tests/fixedbugs/issue193/issue193_test.go | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/fixedbugs/issue193/issue193_test.go b/tests/fixedbugs/issue193/issue193_test.go index 14d977d9..6a72ef93 100644 --- a/tests/fixedbugs/issue193/issue193_test.go +++ b/tests/fixedbugs/issue193/issue193_test.go @@ -1,10 +1,19 @@ package issue193 -import "testing" +import ( + "testing" + + "golang.org/x/sys/cpu" +) //go:generate go run asm.go -out issue193.s -stubs stub.go func TestAddSubs(t *testing.T) { + + if !(cpu.X86.HasAVX2 && cpu.X86.HasAVX512F && cpu.X86.HasAVX512DQ && cpu.X86.HasAVX512VL) { + t.Skipf("Skipping issue193 test because runtime lacks needed CPUID features") + } + in := [8]int64{7, 6, 5, 4, 3, 2, 1, 0} out := [8]int64{13, 1, 9, 1, 5, 1, 1, 1} From 170fe8acac153f1c2233f0ef7690b94a4e4c9b6f Mon Sep 17 00:00:00 2001 From: Vaughn Iverson Date: Wed, 14 Dec 2022 17:25:54 -0800 Subject: [PATCH 5/6] More whitespace eaten by linter --- tests/fixedbugs/issue193/issue193_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/fixedbugs/issue193/issue193_test.go b/tests/fixedbugs/issue193/issue193_test.go index 6a72ef93..e3d291a8 100644 --- a/tests/fixedbugs/issue193/issue193_test.go +++ b/tests/fixedbugs/issue193/issue193_test.go @@ -9,7 +9,6 @@ import ( //go:generate go run asm.go -out issue193.s -stubs stub.go func TestAddSubs(t *testing.T) { - if !(cpu.X86.HasAVX2 && cpu.X86.HasAVX512F && cpu.X86.HasAVX512DQ && cpu.X86.HasAVX512VL) { t.Skipf("Skipping issue193 test because runtime lacks needed CPUID features") } From 42874d6aebef87ec335f4cbc771bb00a06c0efc3 Mon Sep 17 00:00:00 2001 From: Vaughn Iverson Date: Wed, 14 Dec 2022 17:34:10 -0800 Subject: [PATCH 6/6] Fixed package comment in doc.go --- tests/fixedbugs/issue193/doc.go | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tests/fixedbugs/issue193/doc.go b/tests/fixedbugs/issue193/doc.go index 0acb1ef6..bae58a3d 100644 --- a/tests/fixedbugs/issue193/doc.go +++ b/tests/fixedbugs/issue193/doc.go @@ -1,7 +1,8 @@ // Package issue193 tests for the ability to generate VPSCATTER{D,Q}{D,Q} and // VPGATHER{D,Q}{D,Q} instructions *without* specifying a base register in // the destination/source VSIB addressing (VM{64,32}{x,y,z}) memory operands. -// +package issue193 + // From: https://www.felixcloutier.com/x86/vpscatterdd:vpscatterdq:vpscatterqd:vpscatterqq // // BASE_ADDR stands for the memory operand base address (a GPR); *may not exist* @@ -10,9 +11,11 @@ // DISP is the optional 1 or 4 byte displacement // // For example, prior to this fix Avo requires: -// VPSCATTERQQ Z6, K1, 8(BX)(Z7*1) +// +// VPSCATTERQQ Z6, K1, 8(BX)(Z7*1) +// // with BX = 0, when: -// VPSCATTERQQ Z6, K1, 8(Z7*1) +// +// VPSCATTERQQ Z6, K1, 8(Z7*1) +// // is perfectly valid and will suffice. - -package issue193