Skip to content

Commit 5f53e85

Browse files
committed
[SLP]Fix a crash when trying to find reduced ops for the reduced value.
Need to use original reduced value, not the one the compiler gets after reduction, it may be replaced by the extractelement instruction already.
1 parent 3c2b185 commit 5f53e85

2 files changed

Lines changed: 59 additions & 1 deletion

File tree

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13043,7 +13043,7 @@ class HorizontalReduction {
1304313043
Value *OrigV = TrackedToOrig.find(RdxVal)->second;
1304413044
unsigned NumOps =
1304513045
VectorizedVals.lookup(RdxVal) + SameValuesCounter[OrigV];
13046-
if (NumOps != ReducedValsToOps.find(RdxVal)->second.size())
13046+
if (NumOps != ReducedValsToOps.find(OrigV)->second.size())
1304713047
LocalExternallyUsedValues[RdxVal];
1304813048
}
1304913049
// Do not need the list of reused scalars in regular mode anymore.
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mattr=+avx < %s | FileCheck %s
3+
4+
define i32 @foo() {
5+
; CHECK-LABEL: @foo(
6+
; CHECK-NEXT: bb:
7+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i32> zeroinitializer, i32 0
8+
; CHECK-NEXT: [[TMP1:%.*]] = or <4 x i32> zeroinitializer, zeroinitializer
9+
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
10+
; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP1]], zeroinitializer
11+
; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP3]], zeroinitializer
12+
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP4]])
13+
; CHECK-NEXT: [[OP_RDX:%.*]] = mul i32 0, [[TMP5]]
14+
; CHECK-NEXT: [[OP_RDX1:%.*]] = mul i32 [[OP_RDX]], 0
15+
; CHECK-NEXT: [[OP_RDX2:%.*]] = mul i32 [[TMP0]], [[TMP0]]
16+
; CHECK-NEXT: [[OP_RDX3:%.*]] = mul i32 [[TMP0]], [[TMP0]]
17+
; CHECK-NEXT: [[OP_RDX4:%.*]] = mul i32 [[OP_RDX1]], [[OP_RDX2]]
18+
; CHECK-NEXT: [[OP_RDX5:%.*]] = mul i32 [[OP_RDX3]], [[TMP2]]
19+
; CHECK-NEXT: [[OP_RDX6:%.*]] = mul i32 [[OP_RDX4]], [[OP_RDX5]]
20+
; CHECK-NEXT: ret i32 [[OP_RDX6]]
21+
;
22+
bb:
23+
%inst5 = add i32 0, 0
24+
%0 = extractelement <2 x i32> zeroinitializer, i32 0
25+
%inst7 = mul i32 %0, %inst5
26+
%1 = extractelement <2 x i32> zeroinitializer, i32 0
27+
%inst13 = mul i32 %1, %inst7
28+
%inst14 = mul i32 %inst13, 0
29+
%2 = extractelement <2 x i32> zeroinitializer, i32 0
30+
%inst19 = mul i32 %2, %inst14
31+
%inst20 = mul i32 %inst19, 0
32+
%3 = extractelement <2 x i32> zeroinitializer, i32 0
33+
%inst26 = mul i32 %3, %inst20
34+
%inst27 = mul i32 %inst26, 0
35+
%4 = or <4 x i32> zeroinitializer, zeroinitializer
36+
%5 = extractelement <4 x i32> %4, i32 0
37+
%inst31 = mul i32 %5, 0
38+
%inst32 = add i32 %inst31, 0
39+
%inst33 = mul i32 %5, %inst27
40+
%inst34 = mul i32 %inst33, %inst32
41+
%6 = extractelement <4 x i32> %4, i32 1
42+
%inst39 = mul i32 %6, 0
43+
%inst40 = add i32 %inst39, 0
44+
%inst41 = mul i32 0, %inst34
45+
%inst42 = mul i32 %inst41, %inst40
46+
%7 = extractelement <4 x i32> %4, i32 2
47+
%inst47 = mul i32 %7, 0
48+
%inst48 = add i32 %inst47, 0
49+
%inst49 = mul i32 0, %inst42
50+
%inst50 = mul i32 %inst49, %inst48
51+
%8 = extractelement <4 x i32> %4, i32 3
52+
%inst55 = mul i32 %8, 0
53+
%inst56 = add i32 %inst55, 0
54+
%inst57 = mul i32 0, %inst50
55+
%inst58 = mul i32 %inst57, %inst56
56+
ret i32 %inst58
57+
}
58+

0 commit comments

Comments
 (0)