aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRoberto Oliveira <robertoguimaraes8@gmail.com>2017-09-26 15:56:27 +0000
committerRoberto Oliveira <robertoguimaraes8@gmail.com>2017-09-26 16:00:50 +0000
commitf82e428d9cae98be063c203a97199116cfbbfd76 (patch)
tree8255f8dd54f64c98b8d4476c487cd404a78f13f2
parent1b4e6c6f176d0fe213264a91719096cc1fbb50d7 (diff)
downloadaports-f82e428d9cae98be063c203a97199116cfbbfd76.tar.bz2
aports-f82e428d9cae98be063c203a97199116cfbbfd76.tar.xz
community/go: backport ppc64le fix for regression in PPC64.rules
When a MOVDstorezero (8 bytes) is used the offset field in the instruction must be a multiple of 4. This situation had been corrected in the rules for other types of stores but not for the zero case.
-rw-r--r--community/go/APKBUILD6
-rw-r--r--community/go/cmd-compile-fix-regression-in-PPC64.rules-move-zero.patch700
2 files changed, 704 insertions, 2 deletions
diff --git a/community/go/APKBUILD b/community/go/APKBUILD
index e2f3ed1880..c95fae8788 100644
--- a/community/go/APKBUILD
+++ b/community/go/APKBUILD
@@ -4,7 +4,7 @@ pkgname=go
pkgver=1.9
# This should be the latest commit on the corresponding release branch
_toolsver="5d2fd3ccab986d52112bf301d47a819783339d0e"
-pkgrel=2
+pkgrel=3
pkgdesc="Go programming language compiler"
url="http://www.golang.org/"
arch="all"
@@ -21,6 +21,7 @@ source="http://golang.org/dl/go${pkgver/_/}.src.tar.gz
default-buildmode-pie.patch
set-external-linker.patch
make-sure-R0-is-zero-before-main-on-ppc64le.patch
+ cmd-compile-fix-regression-in-PPC64.rules-move-zero.patch
"
# NOTE: building go for x86 with grsec kernel requires:
@@ -147,4 +148,5 @@ sha512sums="70c4b892b6883fb21fc1a547a2b8d174df8c7aca282a3906e3816b4442b16c5da578
d679873082dbac6a47b7c43ac74c47cb19616fb053a4faa3e6ee78004ece86aa5c432fba3d24c030d47396089d7ec7c6357af5648bd767341056396066ff9a04 go-tools-1.9.tar.gz
ef9da66d76e4698314eaf2aa88cf40a8430a15f8f6cb9ad17ee9d72498ec049b60e63e1ff5acf13a916c5ea365f9d9a282b6f2b06e28de9e3484eb9a3d7dd98e default-buildmode-pie.patch
6b36f3780ab10e5c4902473a8ab5c0417220ece584b537517e9e60979bdc5a548ed14dd2546605392c89ec5ea6691769d337d34e2e19b92eba5bbca1898f4ada set-external-linker.patch
-451ca02dea91d74d8e3216c7a48d963bbfc040b43d15868087d6339a1815c4996817c5ace265a20abcdb9d1da4e9ff58e373397d98df773b729876f4623b1cc8 make-sure-R0-is-zero-before-main-on-ppc64le.patch"
+451ca02dea91d74d8e3216c7a48d963bbfc040b43d15868087d6339a1815c4996817c5ace265a20abcdb9d1da4e9ff58e373397d98df773b729876f4623b1cc8 make-sure-R0-is-zero-before-main-on-ppc64le.patch
+9e7c8e6132cd7ab22a72fc92c4f7489b25e0f501efc796c8d07f282b178873ce0c51b173c6cf2c18ca42e24e0fec8a47244eb8c93022e37758705f0a77ed96b1 cmd-compile-fix-regression-in-PPC64.rules-move-zero.patch"
diff --git a/community/go/cmd-compile-fix-regression-in-PPC64.rules-move-zero.patch b/community/go/cmd-compile-fix-regression-in-PPC64.rules-move-zero.patch
new file mode 100644
index 0000000000..cc2ba079ed
--- /dev/null
+++ b/community/go/cmd-compile-fix-regression-in-PPC64.rules-move-zero.patch
@@ -0,0 +1,700 @@
+From 3fda3765b8691de51d62ef6d68ce772f4eb0c601 Mon Sep 17 00:00:00 2001
+From: Lynn Boger <laboger@linux.vnet.ibm.com>
+Date: Tue, 19 Sep 2017 17:36:57 -0400
+Subject: [PATCH] cmd/compile: fix regression in PPC64.rules move zero
+
+When a MOVDstorezero (8 bytes) is used the offset field
+in the instruction must be a multiple of 4. This situation
+had been corrected in the rules for other types of stores
+but not for the zero case.
+
+This also removes some of the special MOVDstorezero cases since
+they can be handled by the general LowerZero case.
+
+Updates made to the ssa test for lowering zero moves to
+include cases where the target is not aligned to at least 4.
+
+Fixes #21947
+
+Change-Id: I7cceceb1be4898c77cd3b5e78b58dce0a7e28edd
+Reviewed-on: https://go-review.googlesource.com/64970
+Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com>
+TryBot-Result: Gobot Gobot <gobot@golang.org>
+Reviewed-by: Carlos Eduardo Seo <cseo@linux.vnet.ibm.com>
+Reviewed-by: Cherry Zhang <cherryyz@google.com>
+---
+ .../compile/internal/gc/testdata/gen/zeroGen.go | 63 ++++++
+ src/cmd/compile/internal/gc/testdata/zero.go | 216 +++++++++++++++++++++
+ src/cmd/compile/internal/ssa/gen/PPC64.rules | 58 ++----
+ src/cmd/compile/internal/ssa/rewritePPC64.go | 172 +++++-----------
+ 4 files changed, 350 insertions(+), 159 deletions(-)
+
+diff --git a/src/cmd/compile/internal/gc/testdata/gen/zeroGen.go b/src/cmd/compile/internal/gc/testdata/gen/zeroGen.go
+index fa70b16495..c764c369e6 100644
+--- a/src/cmd/compile/internal/gc/testdata/gen/zeroGen.go
++++ b/src/cmd/compile/internal/gc/testdata/gen/zeroGen.go
+@@ -19,6 +19,7 @@ import (
+ // will be written into the parent directory containing the tests.
+
+ var sizes = [...]int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 16, 17, 23, 24, 25, 31, 32, 33, 63, 64, 65, 1023, 1024, 1025}
++var usizes = [...]int{8, 16, 24, 32, 64, 256}
+
+ func main() {
+ w := new(bytes.Buffer)
+@@ -61,12 +62,74 @@ func main() {
+ fmt.Fprintf(w, "}\n")
+ }
+
++ for _, s := range usizes {
++ // type for test
++ fmt.Fprintf(w, "type T%du1 struct {\n", s)
++ fmt.Fprintf(w, " b bool\n")
++ fmt.Fprintf(w, " val [%d]byte\n", s)
++ fmt.Fprintf(w, "}\n")
++
++ fmt.Fprintf(w, "type T%du2 struct {\n", s)
++ fmt.Fprintf(w, " i uint16\n")
++ fmt.Fprintf(w, " val [%d]byte\n", s)
++ fmt.Fprintf(w, "}\n")
++
++ // function being tested
++ fmt.Fprintf(w, "//go:noinline\n")
++ fmt.Fprintf(w, "func zero%du1_ssa(t *T%du1) {\n", s, s)
++ fmt.Fprintf(w, " t.val = [%d]byte{}\n", s)
++ fmt.Fprintf(w, "}\n")
++
++ // function being tested
++ fmt.Fprintf(w, "//go:noinline\n")
++ fmt.Fprintf(w, "func zero%du2_ssa(t *T%du2) {\n", s, s)
++ fmt.Fprintf(w, " t.val = [%d]byte{}\n", s)
++ fmt.Fprintf(w, "}\n")
++
++ // testing harness
++ fmt.Fprintf(w, "func testZero%du() {\n", s)
++ fmt.Fprintf(w, " a := T%du1{false, [%d]byte{", s, s)
++ for i := 0; i < s; i++ {
++ fmt.Fprintf(w, "255,")
++ }
++ fmt.Fprintf(w, "}}\n")
++ fmt.Fprintf(w, " zero%du1_ssa(&a)\n", s)
++ fmt.Fprintf(w, " want := T%du1{false, [%d]byte{", s, s)
++ for i := 0; i < s; i++ {
++ fmt.Fprintf(w, "0,")
++ }
++ fmt.Fprintf(w, "}}\n")
++ fmt.Fprintf(w, " if a != want {\n")
++ fmt.Fprintf(w, " fmt.Printf(\"zero%du2 got=%%v, want %%v\\n\", a, want)\n", s)
++ fmt.Fprintf(w, " failed=true\n")
++ fmt.Fprintf(w, " }\n")
++ fmt.Fprintf(w, " b := T%du2{15, [%d]byte{", s, s)
++ for i := 0; i < s; i++ {
++ fmt.Fprintf(w, "255,")
++ }
++ fmt.Fprintf(w, "}}\n")
++ fmt.Fprintf(w, " zero%du2_ssa(&b)\n", s)
++ fmt.Fprintf(w, " wantb := T%du2{15, [%d]byte{", s, s)
++ for i := 0; i < s; i++ {
++ fmt.Fprintf(w, "0,")
++ }
++ fmt.Fprintf(w, "}}\n")
++ fmt.Fprintf(w, " if b != wantb {\n")
++ fmt.Fprintf(w, " fmt.Printf(\"zero%du2 got=%%v, want %%v\\n\", b, wantb)\n", s)
++ fmt.Fprintf(w, " failed=true\n")
++ fmt.Fprintf(w, " }\n")
++ fmt.Fprintf(w, "}\n")
++ }
++
+ // boilerplate at end
+ fmt.Fprintf(w, "var failed bool\n")
+ fmt.Fprintf(w, "func main() {\n")
+ for _, s := range sizes {
+ fmt.Fprintf(w, " testZero%d()\n", s)
+ }
++ for _, s := range usizes {
++ fmt.Fprintf(w, " testZero%du()\n", s)
++ }
+ fmt.Fprintf(w, " if failed {\n")
+ fmt.Fprintf(w, " panic(\"failed\")\n")
+ fmt.Fprintf(w, " }\n")
+diff --git a/src/cmd/compile/internal/gc/testdata/zero.go b/src/cmd/compile/internal/gc/testdata/zero.go
+index f6354868cb..9d261aa401 100644
+--- a/src/cmd/compile/internal/gc/testdata/zero.go
++++ b/src/cmd/compile/internal/gc/testdata/zero.go
+@@ -505,6 +505,216 @@ func testZero1025() {
+ }
+ }
+
++type T8u1 struct {
++ b bool
++ val [8]byte
++}
++type T8u2 struct {
++ i uint16
++ val [8]byte
++}
++
++//go:noinline
++func zero8u1_ssa(t *T8u1) {
++ t.val = [8]byte{}
++}
++
++//go:noinline
++func zero8u2_ssa(t *T8u2) {
++ t.val = [8]byte{}
++}
++func testZero8u() {
++ a := T8u1{false, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
++ zero8u1_ssa(&a)
++ want := T8u1{false, [8]byte{0, 0, 0, 0, 0, 0, 0, 0}}
++ if a != want {
++ fmt.Printf("zero8u2 got=%v, want %v\n", a, want)
++ failed = true
++ }
++ b := T8u2{15, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
++ zero8u2_ssa(&b)
++ wantb := T8u2{15, [8]byte{0, 0, 0, 0, 0, 0, 0, 0}}
++ if b != wantb {
++ fmt.Printf("zero8u2 got=%v, want %v\n", b, wantb)
++ failed = true
++ }
++}
++
++type T16u1 struct {
++ b bool
++ val [16]byte
++}
++type T16u2 struct {
++ i uint16
++ val [16]byte
++}
++
++//go:noinline
++func zero16u1_ssa(t *T16u1) {
++ t.val = [16]byte{}
++}
++
++//go:noinline
++func zero16u2_ssa(t *T16u2) {
++ t.val = [16]byte{}
++}
++func testZero16u() {
++ a := T16u1{false, [16]byte{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}}
++ zero16u1_ssa(&a)
++ want := T16u1{false, [16]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}
++ if a != want {
++ fmt.Printf("zero16u2 got=%v, want %v\n", a, want)
++ failed = true
++ }
++ b := T16u2{15, [16]byte{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}}
++ zero16u2_ssa(&b)
++ wantb := T16u2{15, [16]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}
++ if b != wantb {
++ fmt.Printf("zero16u2 got=%v, want %v\n", b, wantb)
++ failed = true
++ }
++}
++
++type T24u1 struct {
++ b bool
++ val [24]byte
++}
++type T24u2 struct {
++ i uint16
++ val [24]byte
++}
++
++//go:noinline
++func zero24u1_ssa(t *T24u1) {
++ t.val = [24]byte{}
++}
++
++//go:noinline
++func zero24u2_ssa(t *T24u2) {
++ t.val = [24]byte{}
++}
++func testZero24u() {
++ a := T24u1{false, [24]byte{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}}
++ zero24u1_ssa(&a)
++ want := T24u1{false, [24]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}
++ if a != want {
++ fmt.Printf("zero24u2 got=%v, want %v\n", a, want)
++ failed = true
++ }
++ b := T24u2{15, [24]byte{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}}
++ zero24u2_ssa(&b)
++ wantb := T24u2{15, [24]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}
++ if b != wantb {
++ fmt.Printf("zero24u2 got=%v, want %v\n", b, wantb)
++ failed = true
++ }
++}
++
++type T32u1 struct {
++ b bool
++ val [32]byte
++}
++type T32u2 struct {
++ i uint16
++ val [32]byte
++}
++
++//go:noinline
++func zero32u1_ssa(t *T32u1) {
++ t.val = [32]byte{}
++}
++
++//go:noinline
++func zero32u2_ssa(t *T32u2) {
++ t.val = [32]byte{}
++}
++func testZero32u() {
++ a := T32u1{false, [32]byte{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}}
++ zero32u1_ssa(&a)
++ want := T32u1{false, [32]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}
++ if a != want {
++ fmt.Printf("zero32u2 got=%v, want %v\n", a, want)
++ failed = true
++ }
++ b := T32u2{15, [32]byte{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}}
++ zero32u2_ssa(&b)
++ wantb := T32u2{15, [32]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}
++ if b != wantb {
++ fmt.Printf("zero32u2 got=%v, want %v\n", b, wantb)
++ failed = true
++ }
++}
++
++type T64u1 struct {
++ b bool
++ val [64]byte
++}
++type T64u2 struct {
++ i uint16
++ val [64]byte
++}
++
++//go:noinline
++func zero64u1_ssa(t *T64u1) {
++ t.val = [64]byte{}
++}
++
++//go:noinline
++func zero64u2_ssa(t *T64u2) {
++ t.val = [64]byte{}
++}
++func testZero64u() {
++ a := T64u1{false, [64]byte{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}}
++ zero64u1_ssa(&a)
++ want := T64u1{false, [64]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}
++ if a != want {
++ fmt.Printf("zero64u2 got=%v, want %v\n", a, want)
++ failed = true
++ }
++ b := T64u2{15, [64]byte{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}}
++ zero64u2_ssa(&b)
++ wantb := T64u2{15, [64]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}
++ if b != wantb {
++ fmt.Printf("zero64u2 got=%v, want %v\n", b, wantb)
++ failed = true
++ }
++}
++
++type T256u1 struct {
++ b bool
++ val [256]byte
++}
++type T256u2 struct {
++ i uint16
++ val [256]byte
++}
++
++//go:noinline
++func zero256u1_ssa(t *T256u1) {
++ t.val = [256]byte{}
++}
++
++//go:noinline
++func zero256u2_ssa(t *T256u2) {
++ t.val = [256]byte{}
++}
++func testZero256u() {
++ a := T256u1{false, [256]byte{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}}
++ zero256u1_ssa(&a)
++ want := T256u1{false, [256]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}
++ if a != want {
++ fmt.Printf("zero256u2 got=%v, want %v\n", a, want)
++ failed = true
++ }
++ b := T256u2{15, [256]byte{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}}
++ zero256u2_ssa(&b)
++ wantb := T256u2{15, [256]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}
++ if b != wantb {
++ fmt.Printf("zero256u2 got=%v, want %v\n", b, wantb)
++ failed = true
++ }
++}
++
+ var failed bool
+
+ func main() {
+@@ -533,6 +743,12 @@ func main() {
+ testZero1023()
+ testZero1024()
+ testZero1025()
++ testZero8u()
++ testZero16u()
++ testZero24u()
++ testZero32u()
++ testZero64u()
++ testZero256u()
+ if failed {
+ panic("failed")
+ }
+diff --git a/src/cmd/compile/internal/ssa/gen/PPC64.rules b/src/cmd/compile/internal/ssa/gen/PPC64.rules
+index 0a09724b8e..6e8d335c90 100644
+--- a/src/cmd/compile/internal/ssa/gen/PPC64.rules
++++ b/src/cmd/compile/internal/ssa/gen/PPC64.rules
+@@ -572,49 +572,29 @@
+ (MOVBstorezero [6] destptr
+ (MOVHstorezero [4] destptr
+ (MOVWstorezero destptr mem)))
+-(Zero [8] destptr mem) ->
+- (MOVDstorezero destptr mem)
+
+-// Zero small numbers of words directly.
+-(Zero [12] destptr mem) ->
++// MOVD for store with DS must have offsets that are multiple of 4
++(Zero [8] {t} destptr mem) && t.(*types.Type).Alignment()%4 == 0 ->
++ (MOVDstorezero destptr mem)
++(Zero [8] destptr mem) ->
++ (MOVWstorezero [4] destptr
++ (MOVWstorezero [0] destptr mem))
++// Handle these cases only if aligned properly, otherwise use general case below
++(Zero [12] {t} destptr mem) && t.(*types.Type).Alignment()%4 == 0 ->
+ (MOVWstorezero [8] destptr
+ (MOVDstorezero [0] destptr mem))
+-(Zero [16] destptr mem) ->
+- (MOVDstorezero [8] destptr
++(Zero [16] {t} destptr mem) && t.(*types.Type).Alignment()%4 == 0 ->
++ (MOVDstorezero [8] destptr
+ (MOVDstorezero [0] destptr mem))
+-(Zero [24] destptr mem) ->
+- (MOVDstorezero [16] destptr
+- (MOVDstorezero [8] destptr
+- (MOVDstorezero [0] destptr mem)))
+-(Zero [32] destptr mem) ->
+- (MOVDstorezero [24] destptr
+- (MOVDstorezero [16] destptr
+- (MOVDstorezero [8] destptr
+- (MOVDstorezero [0] destptr mem))))
+-
+-(Zero [40] destptr mem) ->
+- (MOVDstorezero [32] destptr
+- (MOVDstorezero [24] destptr
+- (MOVDstorezero [16] destptr
+- (MOVDstorezero [8] destptr
+- (MOVDstorezero [0] destptr mem)))))
+-
+-(Zero [48] destptr mem) ->
+- (MOVDstorezero [40] destptr
+- (MOVDstorezero [32] destptr
+- (MOVDstorezero [24] destptr
+- (MOVDstorezero [16] destptr
+- (MOVDstorezero [8] destptr
+- (MOVDstorezero [0] destptr mem))))))
+-
+-(Zero [56] destptr mem) ->
+- (MOVDstorezero [48] destptr
+- (MOVDstorezero [40] destptr
+- (MOVDstorezero [32] destptr
+- (MOVDstorezero [24] destptr
+- (MOVDstorezero [16] destptr
+- (MOVDstorezero [8] destptr
+- (MOVDstorezero [0] destptr mem)))))))
++(Zero [24] {t} destptr mem) && t.(*types.Type).Alignment()%4 == 0 ->
++ (MOVDstorezero [16] destptr
++ (MOVDstorezero [8] destptr
++ (MOVDstorezero [0] destptr mem)))
++(Zero [32] {t} destptr mem) && t.(*types.Type).Alignment()%4 == 0 ->
++ (MOVDstorezero [24] destptr
++ (MOVDstorezero [16] destptr
++ (MOVDstorezero [8] destptr
++ (MOVDstorezero [0] destptr mem))))
+
+ // Handle cases not handled above
+ (Zero [s] ptr mem) -> (LoweredZero [s] ptr mem)
+diff --git a/src/cmd/compile/internal/ssa/rewritePPC64.go b/src/cmd/compile/internal/ssa/rewritePPC64.go
+index 53d457f224..7167c9516b 100644
+--- a/src/cmd/compile/internal/ssa/rewritePPC64.go
++++ b/src/cmd/compile/internal/ssa/rewritePPC64.go
+@@ -12688,35 +12688,39 @@ func rewriteValuePPC64_OpZero_0(v *Value) bool {
+ v.AddArg(v0)
+ return true
+ }
+- // match: (Zero [8] destptr mem)
+- // cond:
++ // match: (Zero [8] {t} destptr mem)
++ // cond: t.(*types.Type).Alignment()%4 == 0
+ // result: (MOVDstorezero destptr mem)
+ for {
+ if v.AuxInt != 8 {
+ break
+ }
++ t := v.Aux
+ _ = v.Args[1]
+ destptr := v.Args[0]
+ mem := v.Args[1]
++ if !(t.(*types.Type).Alignment()%4 == 0) {
++ break
++ }
+ v.reset(OpPPC64MOVDstorezero)
+ v.AddArg(destptr)
+ v.AddArg(mem)
+ return true
+ }
+- // match: (Zero [12] destptr mem)
++ // match: (Zero [8] destptr mem)
+ // cond:
+- // result: (MOVWstorezero [8] destptr (MOVDstorezero [0] destptr mem))
++ // result: (MOVWstorezero [4] destptr (MOVWstorezero [0] destptr mem))
+ for {
+- if v.AuxInt != 12 {
++ if v.AuxInt != 8 {
+ break
+ }
+ _ = v.Args[1]
+ destptr := v.Args[0]
+ mem := v.Args[1]
+ v.reset(OpPPC64MOVWstorezero)
+- v.AuxInt = 8
++ v.AuxInt = 4
+ v.AddArg(destptr)
+- v0 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, types.TypeMem)
++ v0 := b.NewValue0(v.Pos, OpPPC64MOVWstorezero, types.TypeMem)
+ v0.AuxInt = 0
+ v0.AddArg(destptr)
+ v0.AddArg(mem)
+@@ -12728,17 +12732,21 @@ func rewriteValuePPC64_OpZero_0(v *Value) bool {
+ func rewriteValuePPC64_OpZero_10(v *Value) bool {
+ b := v.Block
+ _ = b
+- // match: (Zero [16] destptr mem)
+- // cond:
+- // result: (MOVDstorezero [8] destptr (MOVDstorezero [0] destptr mem))
++ // match: (Zero [12] {t} destptr mem)
++ // cond: t.(*types.Type).Alignment()%4 == 0
++ // result: (MOVWstorezero [8] destptr (MOVDstorezero [0] destptr mem))
+ for {
+- if v.AuxInt != 16 {
++ if v.AuxInt != 12 {
+ break
+ }
++ t := v.Aux
+ _ = v.Args[1]
+ destptr := v.Args[0]
+ mem := v.Args[1]
+- v.reset(OpPPC64MOVDstorezero)
++ if !(t.(*types.Type).Alignment()%4 == 0) {
++ break
++ }
++ v.reset(OpPPC64MOVWstorezero)
+ v.AuxInt = 8
+ v.AddArg(destptr)
+ v0 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, types.TypeMem)
+@@ -12748,161 +12756,85 @@ func rewriteValuePPC64_OpZero_10(v *Value) bool {
+ v.AddArg(v0)
+ return true
+ }
+- // match: (Zero [24] destptr mem)
+- // cond:
+- // result: (MOVDstorezero [16] destptr (MOVDstorezero [8] destptr (MOVDstorezero [0] destptr mem)))
++ // match: (Zero [16] {t} destptr mem)
++ // cond: t.(*types.Type).Alignment()%4 == 0
++ // result: (MOVDstorezero [8] destptr (MOVDstorezero [0] destptr mem))
+ for {
+- if v.AuxInt != 24 {
++ if v.AuxInt != 16 {
+ break
+ }
++ t := v.Aux
+ _ = v.Args[1]
+ destptr := v.Args[0]
+ mem := v.Args[1]
+- v.reset(OpPPC64MOVDstorezero)
+- v.AuxInt = 16
+- v.AddArg(destptr)
+- v0 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, types.TypeMem)
+- v0.AuxInt = 8
+- v0.AddArg(destptr)
+- v1 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, types.TypeMem)
+- v1.AuxInt = 0
+- v1.AddArg(destptr)
+- v1.AddArg(mem)
+- v0.AddArg(v1)
+- v.AddArg(v0)
+- return true
+- }
+- // match: (Zero [32] destptr mem)
+- // cond:
+- // result: (MOVDstorezero [24] destptr (MOVDstorezero [16] destptr (MOVDstorezero [8] destptr (MOVDstorezero [0] destptr mem))))
+- for {
+- if v.AuxInt != 32 {
++ if !(t.(*types.Type).Alignment()%4 == 0) {
+ break
+ }
+- _ = v.Args[1]
+- destptr := v.Args[0]
+- mem := v.Args[1]
+ v.reset(OpPPC64MOVDstorezero)
+- v.AuxInt = 24
++ v.AuxInt = 8
+ v.AddArg(destptr)
+ v0 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, types.TypeMem)
+- v0.AuxInt = 16
++ v0.AuxInt = 0
+ v0.AddArg(destptr)
+- v1 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, types.TypeMem)
+- v1.AuxInt = 8
+- v1.AddArg(destptr)
+- v2 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, types.TypeMem)
+- v2.AuxInt = 0
+- v2.AddArg(destptr)
+- v2.AddArg(mem)
+- v1.AddArg(v2)
+- v0.AddArg(v1)
++ v0.AddArg(mem)
+ v.AddArg(v0)
+ return true
+ }
+- // match: (Zero [40] destptr mem)
+- // cond:
+- // result: (MOVDstorezero [32] destptr (MOVDstorezero [24] destptr (MOVDstorezero [16] destptr (MOVDstorezero [8] destptr (MOVDstorezero [0] destptr mem)))))
++ // match: (Zero [24] {t} destptr mem)
++ // cond: t.(*types.Type).Alignment()%4 == 0
++ // result: (MOVDstorezero [16] destptr (MOVDstorezero [8] destptr (MOVDstorezero [0] destptr mem)))
+ for {
+- if v.AuxInt != 40 {
++ if v.AuxInt != 24 {
+ break
+ }
++ t := v.Aux
+ _ = v.Args[1]
+ destptr := v.Args[0]
+ mem := v.Args[1]
+- v.reset(OpPPC64MOVDstorezero)
+- v.AuxInt = 32
+- v.AddArg(destptr)
+- v0 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, types.TypeMem)
+- v0.AuxInt = 24
+- v0.AddArg(destptr)
+- v1 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, types.TypeMem)
+- v1.AuxInt = 16
+- v1.AddArg(destptr)
+- v2 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, types.TypeMem)
+- v2.AuxInt = 8
+- v2.AddArg(destptr)
+- v3 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, types.TypeMem)
+- v3.AuxInt = 0
+- v3.AddArg(destptr)
+- v3.AddArg(mem)
+- v2.AddArg(v3)
+- v1.AddArg(v2)
+- v0.AddArg(v1)
+- v.AddArg(v0)
+- return true
+- }
+- // match: (Zero [48] destptr mem)
+- // cond:
+- // result: (MOVDstorezero [40] destptr (MOVDstorezero [32] destptr (MOVDstorezero [24] destptr (MOVDstorezero [16] destptr (MOVDstorezero [8] destptr (MOVDstorezero [0] destptr mem))))))
+- for {
+- if v.AuxInt != 48 {
++ if !(t.(*types.Type).Alignment()%4 == 0) {
+ break
+ }
+- _ = v.Args[1]
+- destptr := v.Args[0]
+- mem := v.Args[1]
+ v.reset(OpPPC64MOVDstorezero)
+- v.AuxInt = 40
++ v.AuxInt = 16
+ v.AddArg(destptr)
+ v0 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, types.TypeMem)
+- v0.AuxInt = 32
++ v0.AuxInt = 8
+ v0.AddArg(destptr)
+ v1 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, types.TypeMem)
+- v1.AuxInt = 24
++ v1.AuxInt = 0
+ v1.AddArg(destptr)
+- v2 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, types.TypeMem)
+- v2.AuxInt = 16
+- v2.AddArg(destptr)
+- v3 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, types.TypeMem)
+- v3.AuxInt = 8
+- v3.AddArg(destptr)
+- v4 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, types.TypeMem)
+- v4.AuxInt = 0
+- v4.AddArg(destptr)
+- v4.AddArg(mem)
+- v3.AddArg(v4)
+- v2.AddArg(v3)
+- v1.AddArg(v2)
++ v1.AddArg(mem)
+ v0.AddArg(v1)
+ v.AddArg(v0)
+ return true
+ }
+- // match: (Zero [56] destptr mem)
+- // cond:
+- // result: (MOVDstorezero [48] destptr (MOVDstorezero [40] destptr (MOVDstorezero [32] destptr (MOVDstorezero [24] destptr (MOVDstorezero [16] destptr (MOVDstorezero [8] destptr (MOVDstorezero [0] destptr mem)))))))
++ // match: (Zero [32] {t} destptr mem)
++ // cond: t.(*types.Type).Alignment()%4 == 0
++ // result: (MOVDstorezero [24] destptr (MOVDstorezero [16] destptr (MOVDstorezero [8] destptr (MOVDstorezero [0] destptr mem))))
+ for {
+- if v.AuxInt != 56 {
++ if v.AuxInt != 32 {
+ break
+ }
++ t := v.Aux
+ _ = v.Args[1]
+ destptr := v.Args[0]
+ mem := v.Args[1]
++ if !(t.(*types.Type).Alignment()%4 == 0) {
++ break
++ }
+ v.reset(OpPPC64MOVDstorezero)
+- v.AuxInt = 48
++ v.AuxInt = 24
+ v.AddArg(destptr)
+ v0 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, types.TypeMem)
+- v0.AuxInt = 40
++ v0.AuxInt = 16
+ v0.AddArg(destptr)
+ v1 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, types.TypeMem)
+- v1.AuxInt = 32
++ v1.AuxInt = 8
+ v1.AddArg(destptr)
+ v2 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, types.TypeMem)
+- v2.AuxInt = 24
++ v2.AuxInt = 0
+ v2.AddArg(destptr)
+- v3 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, types.TypeMem)
+- v3.AuxInt = 16
+- v3.AddArg(destptr)
+- v4 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, types.TypeMem)
+- v4.AuxInt = 8
+- v4.AddArg(destptr)
+- v5 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, types.TypeMem)
+- v5.AuxInt = 0
+- v5.AddArg(destptr)
+- v5.AddArg(mem)
+- v4.AddArg(v5)
+- v3.AddArg(v4)
+- v2.AddArg(v3)
++ v2.AddArg(mem)
+ v1.AddArg(v2)
+ v0.AddArg(v1)
+ v.AddArg(v0)
+--
+2.14.1
+