From 4ceef429cf4103641c228ed2549e88a60c9b2567 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Thu, 14 Nov 2024 03:13:52 +0800 Subject: [PATCH] [CIR] [Lowering] [X86_64] Support VAArg for LongDouble (#1101) This is the following of https://github.com/llvm/clangir/pull/1100. After https://github.com/llvm/clangir/pull/1100, when we want to use LongDouble for VAArg, we will be in trouble due to details in X86_64's ABI and this patch tries to address this. The practical impact the patch is, after this patch, with https://github.com/llvm/clangir/pull/1088 and a small following up fix, we can build and run all C's benchmark in SpecCPU 2017. I think it is a milestone. --- clang/lib/CIR/Dialect/IR/CIRTypes.cpp | 3 +- .../Transforms/TargetLowering/ABIInfoImpl.cpp | 7 ++ .../Transforms/TargetLowering/ABIInfoImpl.h | 1 + .../TargetLowering/CIRLowerContext.cpp | 12 ++ .../Targets/LoweringPrepareX86CXXABI.cpp | 5 +- .../Transforms/TargetLowering/Targets/X86.cpp | 116 +++++++++++++++++- .../TargetLowering/Targets/X86_64ABIInfo.h | 8 ++ clang/test/CIR/Lowering/var-arg-x86_64.c | 53 ++++++++ 8 files changed, 197 insertions(+), 8 deletions(-) diff --git a/clang/lib/CIR/Dialect/IR/CIRTypes.cpp b/clang/lib/CIR/Dialect/IR/CIRTypes.cpp index e579fe4c2f0c..21584eeaefb9 100644 --- a/clang/lib/CIR/Dialect/IR/CIRTypes.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRTypes.cpp @@ -747,7 +747,7 @@ const llvm::fltSemantics &FP80Type::getFloatSemantics() const { llvm::TypeSize FP80Type::getTypeSizeInBits(const mlir::DataLayout &dataLayout, mlir::DataLayoutEntryListRef params) const { - return llvm::TypeSize::getFixed(16); + return llvm::TypeSize::getFixed(128); } uint64_t FP80Type::getABIAlignment(const mlir::DataLayout &dataLayout, @@ -768,6 +768,7 @@ const llvm::fltSemantics &FP128Type::getFloatSemantics() const { llvm::TypeSize FP128Type::getTypeSizeInBits(const mlir::DataLayout &dataLayout, mlir::DataLayoutEntryListRef params) const { + // FIXME: We probably want it to return 128. But we're lacking a test now. return llvm::TypeSize::getFixed(16); } diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfoImpl.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfoImpl.cpp index e07315d54a38..15b54b526aeb 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfoImpl.cpp +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfoImpl.cpp @@ -65,4 +65,11 @@ CIRCXXABI::RecordArgABI getRecordArgABI(const StructType RT, return CXXABI.getRecordArgABI(RT); } +CIRCXXABI::RecordArgABI getRecordArgABI(mlir::Type ty, CIRCXXABI &CXXABI) { + auto sTy = mlir::dyn_cast(ty); + if (!sTy) + return CIRCXXABI::RAA_Default; + return getRecordArgABI(sTy, CXXABI); +} + } // namespace cir diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfoImpl.h b/clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfoImpl.h index 8005b153a544..8088a333c4a5 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfoImpl.h +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfoImpl.h @@ -33,6 +33,7 @@ mlir::Value emitRoundPointerUpToAlignment(cir::CIRBaseBuilderTy &builder, mlir::Type useFirstFieldIfTransparentUnion(mlir::Type Ty); CIRCXXABI::RecordArgABI getRecordArgABI(const StructType RT, CIRCXXABI &CXXABI); +CIRCXXABI::RecordArgABI getRecordArgABI(mlir::Type ty, CIRCXXABI &CXXABI); } // namespace cir diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRLowerContext.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRLowerContext.cpp index c6960d411b93..09013f113015 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRLowerContext.cpp +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRLowerContext.cpp @@ -94,6 +94,18 @@ clang::TypeInfo CIRLowerContext::getTypeInfoImpl(const mlir::Type T) const { Align = Target->getDoubleAlign(); break; } + if (auto longDoubleTy = mlir::dyn_cast(T)) { + if (getLangOpts().OpenMP && getLangOpts().OpenMPIsTargetDevice && + (Target->getLongDoubleWidth() != AuxTarget->getLongDoubleWidth() || + Target->getLongDoubleAlign() != AuxTarget->getLongDoubleAlign())) { + Width = AuxTarget->getLongDoubleWidth(); + Align = AuxTarget->getLongDoubleAlign(); + } else { + Width = Target->getLongDoubleWidth(); + Align = Target->getLongDoubleAlign(); + } + break; + } cir_cconv_unreachable("Unknown builtin type!"); break; } diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/LoweringPrepareX86CXXABI.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/LoweringPrepareX86CXXABI.cpp index ba376d26b0fc..d9b574fac4e7 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/LoweringPrepareX86CXXABI.cpp +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/LoweringPrepareX86CXXABI.cpp @@ -46,7 +46,6 @@ std::unique_ptr getLowerModule(cir::VAArgOp op) { mlir::ModuleOp mo = op->getParentOfType(); if (!mo) return nullptr; - mlir::PatternRewriter rewriter(mo.getContext()); return cir::createLowerModule(mo, rewriter); } @@ -92,7 +91,7 @@ mlir::Value LoweringPrepareX86CXXABI::lowerVAArgX86_64( // Let's hope LLVM's va_arg instruction can take care of it. // Remove this when X86_64ABIInfo::classify can take care of every type. if (!mlir::isa(op.getType())) + StructType, LongDoubleType>(op.getType())) return nullptr; // Assume that va_list type is correct; should be pointer to LLVM type: @@ -107,7 +106,6 @@ mlir::Value LoweringPrepareX86CXXABI::lowerVAArgX86_64( std::unique_ptr lowerModule = getLowerModule(op); if (!lowerModule) return nullptr; - mlir::Type ty = op.getType(); // FIXME: How should we access the X86AVXABILevel? @@ -167,7 +165,6 @@ mlir::Value LoweringPrepareX86CXXABI::lowerVAArgX86_64( mlir::Block *contBlock = currentBlock->splitBlock(op); mlir::Block *inRegBlock = builder.createBlock(contBlock); mlir::Block *inMemBlock = builder.createBlock(contBlock); - builder.setInsertionPointToEnd(currentBlock); builder.create(loc, inRegs, inRegBlock, inMemBlock); diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86.cpp index 39bd1716aa3b..fc65d4053649 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86.cpp +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86.cpp @@ -165,6 +165,21 @@ void X86_64ABIInfo::classify(mlir::Type Ty, uint64_t OffsetBase, Class &Lo, Current = Class::SSE; return; + } else if (mlir::isa(Ty)) { + const llvm::fltSemantics *LDF = + &getContext().getTargetInfo().getLongDoubleFormat(); + if (LDF == &llvm::APFloat::IEEEquad()) { + Lo = Class::SSE; + Hi = Class::SSEUp; + } else if (LDF == &llvm::APFloat::x87DoubleExtended()) { + Lo = Class::X87; + Hi = Class::X87Up; + } else if (LDF == &llvm::APFloat::IEEEdouble()) { + Current = Class::SSE; + } else { + llvm_unreachable("unexpected long double representation!"); + } + return; } else if (mlir::isa(Ty)) { Current = Class::Integer; } else if (const auto RT = mlir::dyn_cast(Ty)) { @@ -267,6 +282,65 @@ void X86_64ABIInfo::classify(mlir::Type Ty, uint64_t OffsetBase, Class &Lo, cir_cconv_unreachable("NYI"); } +ABIArgInfo X86_64ABIInfo::getIndirectResult(mlir::Type ty, + unsigned freeIntRegs) const { + // If this is a scalar LLVM value then assume LLVM will pass it in the right + // place naturally. + // + // This assumption is optimistic, as there could be free registers available + // when we need to pass this argument in memory, and LLVM could try to pass + // the argument in the free register. This does not seem to happen currently, + // but this code would be much safer if we could mark the argument with + // 'onstack'. See PR12193. + if (!isAggregateTypeForABI(ty) /* && IsIllegalVectorType(Ty) &&*/ + /*!Ty->isBitIntType()*/) { + // FIXME: Handling enum type? + + return (isPromotableIntegerTypeForABI(ty) ? ABIArgInfo::getExtend(ty) + : ABIArgInfo::getDirect()); + } + + if (CIRCXXABI::RecordArgABI RAA = getRecordArgABI(ty, getCXXABI())) + return getNaturalAlignIndirect(ty, RAA == CIRCXXABI::RAA_DirectInMemory); + + // Compute the byval alignment. We specify the alignment of the byval in all + // cases so that the mid-level optimizer knows the alignment of the byval. + unsigned align = std::max(getContext().getTypeAlign(ty) / 8, 8U); + + // Attempt to avoid passing indirect results using byval when possible. This + // is important for good codegen. + // + // We do this by coercing the value into a scalar type which the backend can + // handle naturally (i.e., without using byval). + // + // For simplicity, we currently only do this when we have exhausted all of the + // free integer registers. Doing this when there are free integer registers + // would require more care, as we would have to ensure that the coerced value + // did not claim the unused register. That would require either reording the + // arguments to the function (so that any subsequent inreg values came first), + // or only doing this optimization when there were no following arguments that + // might be inreg. + // + // We currently expect it to be rare (particularly in well written code) for + // arguments to be passed on the stack when there are still free integer + // registers available (this would typically imply large structs being passed + // by value), so this seems like a fair tradeoff for now. + // + // We can revisit this if the backend grows support for 'onstack' parameter + // attributes. See PR12193. + if (freeIntRegs == 0) { + uint64_t size = getContext().getTypeSize(ty); + + // If this type fits in an eightbyte, coerce it into the matching integral + // type, which will end up on the stack (with alignment 8). + if (align == 8 && size <= 64) + return ABIArgInfo::getDirect( + cir::IntType::get(LT.getMLIRContext(), size, false)); + } + + return ABIArgInfo::getIndirect(align); +} + /// Return a type that will be passed by the backend in the low 8 bytes of an /// XMM register, corresponding to the SSE class. mlir::Type X86_64ABIInfo::GetSSETypeAtOffset(mlir::Type IRType, @@ -278,7 +352,7 @@ mlir::Type X86_64ABIInfo::GetSSETypeAtOffset(mlir::Type IRType, (unsigned)getContext().getTypeSize(SourceTy) / 8 - SourceOffset; mlir::Type T0 = getFPTypeAtOffset(IRType, IROffset, TD); if (!T0 || mlir::isa(T0)) - return T0; // NOTE(cir): Not sure if this is correct. + return cir::DoubleType::get(LT.getMLIRContext()); mlir::Type T1 = {}; unsigned T0Size = TD.getTypeAllocSize(T0); @@ -539,6 +613,22 @@ ABIArgInfo X86_64ABIInfo::classifyArgumentType( ++neededSSE; break; } + // AMD64-ABI 3.2.3p3: Rule 1. If the class is MEMORY, pass the argument + // on the stack. + case Class::Memory: + + // AMD64-ABI 3.2.3p3: Rule 5. If the class is X87, X87UP or + // COMPLEX_X87, it is passed in memory. + case Class::X87: + case Class::ComplexX87: + if (getRecordArgABI(Ty, getCXXABI()) == CIRCXXABI::RAA_Indirect) + ++neededInt; + return getIndirectResult(Ty, freeIntRegs); + + case Class::SSEUp: + case Class::X87Up: + llvm_unreachable("Invalid classification for lo word."); + default: cir_cconv_assert_or_abort(!cir::MissingFeatures::X86ArgTypeClassification(), "NYI"); @@ -546,6 +636,11 @@ ABIArgInfo X86_64ABIInfo::classifyArgumentType( mlir::Type HighPart = {}; switch (Hi) { + case Class::Memory: + case Class::X87: + case Class::ComplexX87: + llvm_unreachable("Invalid classification for hi word."); + case Class::NoClass: break; @@ -558,8 +653,23 @@ ABIArgInfo X86_64ABIInfo::classifyArgumentType( return ABIArgInfo::getDirect(HighPart, 8); break; - default: - cir_cconv_unreachable("NYI"); + // X87Up generally doesn't occur here (long double is passed in + // memory), except in situations involving unions. + case Class::X87Up: + case Class::SSE: + ++neededSSE; + HighPart = GetSSETypeAtOffset(Ty, 8, Ty, 8); + + if (Lo == Class::NoClass) // Pass HighPart at offset 8 in memory. + return ABIArgInfo::getDirect(HighPart, 8); + break; + + // AMD64-ABI 3.2.3p3: Rule 4. If the class is SSEUP, the + // eightbyte is passed in the upper half of the last used SSE + // register. This only happens when 128-bit vectors are passed. + case Class::SSEUp: + llvm_unreachable("NYI && We need to implement GetByteVectorType"); + break; } // If a high part was specified, merge it together with the low part. It is diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86_64ABIInfo.h b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86_64ABIInfo.h index 201730519207..2ac4cdaa0309 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86_64ABIInfo.h +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86_64ABIInfo.h @@ -69,6 +69,14 @@ class X86_64ABIInfo : public cir::ABIInfo { mlir::Type SourceTy, unsigned SourceOffset) const; + /// Give a source type \arg Ty, return a suitable result such that the + /// argument will be passed in memory. + /// + /// \param freeIntRegs - The number of free integer registers remaining + /// available. + ::cir::ABIArgInfo getIndirectResult(mlir::Type ty, + unsigned freeIntRegs) const; + /// The 0.98 ABI revision clarified a lot of ambiguities, /// unfortunately in ways that were not always consistent with /// certain previous compilers. In particular, platforms which diff --git a/clang/test/CIR/Lowering/var-arg-x86_64.c b/clang/test/CIR/Lowering/var-arg-x86_64.c index 992d5e82cd98..a445ac657c74 100644 --- a/clang/test/CIR/Lowering/var-arg-x86_64.c +++ b/clang/test/CIR/Lowering/var-arg-x86_64.c @@ -1,3 +1,4 @@ +// REQUIRES: system-linux // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -fno-clangir-call-conv-lowering %s -o %t.cir // RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -fno-clangir-call-conv-lowering %s -o %t.ll @@ -76,3 +77,55 @@ double f1(int n, ...) { // CIR: [[CASTED_ARG_P:%.+]] = cir.cast(bitcast, [[ARG]] // CIR: [[CASTED_ARG:%.+]] = cir.load align(16) [[CASTED_ARG_P]] // CIR: store [[CASTED_ARG]], [[RES]] +long double f2(int n, ...) { + va_list valist; + va_start(valist, n); + long double res = va_arg(valist, long double); + va_end(valist); + return res; +} + +// CHECK: define {{.*}}@f2 +// CHECK: [[RESULT:%.+]] = alloca x86_fp80 +// CHECK: [[VA_LIST_ALLOCA:%.+]] = alloca {{.*}}[[VA_LIST_TYPE]] +// CHECK: [[RES:%.+]] = alloca x86_fp80 +// CHECK: [[VA_LIST:%.+]] = getelementptr {{.*}} [[VA_LIST_ALLOCA]], i32 0 +// CHECK: call {{.*}}@llvm.va_start.p0(ptr [[VA_LIST]]) +// CHECK: [[VA_LIST2:%.+]] = getelementptr {{.*}} [[VA_LIST_ALLOCA]], i32 0 +// CHECK: [[OVERFLOW_AREA_P:%.+]] = getelementptr {{.*}} [[VA_LIST2]], i32 0, i32 2 +// CHECK: [[OVERFLOW_AREA:%.+]] = load ptr, ptr [[OVERFLOW_AREA_P]] +// Ptr Mask Operations +// CHECK: [[OVERFLOW_AREA_OFFSET_ALIGNED:%.+]] = getelementptr i8, ptr [[OVERFLOW_AREA]], i64 15 +// CHECK: [[OVERFLOW_AREA_OFFSET_ALIGNED_P:%.+]] = ptrtoint ptr [[OVERFLOW_AREA_OFFSET_ALIGNED]] to i32 +// CHECK: [[MASKED:%.+]] = and i32 [[OVERFLOW_AREA_OFFSET_ALIGNED_P]], -16 +// CHECK: [[DIFF:%.+]] = sub i32 [[OVERFLOW_AREA_OFFSET_ALIGNED_P]], [[MASKED]] +// CHECK: [[PTR_MASKED:%.+]] = getelementptr i8, ptr [[OVERFLOW_AREA_OFFSET_ALIGNED]], i32 [[DIFF]] +// CHECK: [[OVERFLOW_AREA_NEXT:%.+]] = getelementptr i8, ptr [[PTR_MASKED]], i64 16 +// CHECK: store ptr [[OVERFLOW_AREA_NEXT]], ptr [[OVERFLOW_AREA_P]] +// CHECK: [[VALUE:%.+]] = load x86_fp80, ptr [[PTR_MASKED]] +// CHECK: store x86_fp80 [[VALUE]], ptr [[RES]] +// CHECK: [[VA_LIST2:%.+]] = getelementptr {{.*}} [[VA_LIST_ALLOCA]], i32 0 +// CHECK: call {{.*}}@llvm.va_end.p0(ptr [[VA_LIST2]]) +// CHECK: [[VALUE2:%.+]] = load x86_fp80, ptr [[RES]] +// CHECK: store x86_fp80 [[VALUE2]], ptr [[RESULT]] +// CHECK: [[RETURN_VALUE:%.+]] = load x86_fp80, ptr [[RESULT]] +// CHECK: ret x86_fp80 [[RETURN_VALUE]] + +// CIR: cir.func @f2 +// CIR: [[VA_LIST_ALLOCA:%.+]] = cir.alloca !cir.array, !cir.ptr>, ["valist"] +// CIR: [[RES:%.+]] = cir.alloca !cir.long_double, !cir.ptr>, ["res" +// CIR: [[VASTED_VA_LIST:%.+]] = cir.cast(array_to_ptrdecay, [[VA_LIST_ALLOCA]] +// CIR: cir.va.start [[VASTED_VA_LIST]] +// CIR: [[VASTED_VA_LIST:%.+]] = cir.cast(array_to_ptrdecay, [[VA_LIST_ALLOCA]] +// CIR: [[OVERFLOW_AREA_P:%.+]] = cir.get_member [[VASTED_VA_LIST]][2] {name = "overflow_arg_area"} +// CIR: [[OVERFLOW_AREA:%.+]] = cir.load [[OVERFLOW_AREA_P]] +// CIR: [[CASTED:%.+]] = cir.cast(bitcast, [[OVERFLOW_AREA]] : !cir.ptr) +// CIR: [[CONSTANT:%.+]] = cir.const #cir.int<15> +// CIR: [[PTR_STRIDE:%.+]] = cir.ptr_stride([[CASTED]] {{.*}}[[CONSTANT]] +// CIR: [[MINUS_ALIGN:%.+]] = cir.const #cir.int<-16> +// CIR: [[ALIGNED:%.+]] = cir.ptr_mask([[PTR_STRIDE]], [[MINUS_ALIGN]] +// CIR: [[ALIGN:%.+]] = cir.const #cir.int<16> +// CIR: [[CAST_ALIGNED:%.+]] = cir.cast(bitcast, [[ALIGNED]] : !cir.ptr), !cir.ptr> +// CIR: [[CAST_ALIGNED_VALUE:%.+]] = cir.load [[CAST_ALIGNED]] +// CIR: cir.store [[CAST_ALIGNED_VALUE]], [[RES]] +// CIR. cir.via.end