Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions docs/DXIL.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1758,6 +1758,15 @@ The following signature shows the operation syntax::

The call respects SM5.1 OOB and alignment rules.

The ``alignment`` parameter specifies the **absolute alignment** of the
effective address (``base address + index``). For regular ``Load`` operations,
this defaults to 4 bytes for raw buffers. For templated ''Load'' operations,
this defaults to the size of the largest scalar component contained in the
aggregate template parameter type. The HLSL
``AlignedLoad<T>(offset, alignment)`` intrinsic allows applications to specify
custom alignment values when they can guarantee higher alignment, enabling
backend compiler optimizations.

==================== =====================================================
Valid resource type # of active coordinates
==================== =====================================================
Expand Down Expand Up @@ -1816,6 +1825,15 @@ The call respects SM5.1 OOB and alignment rules.

The write mask indicates which components are written (x - 1, y - 2, z - 4, w - 8), similar to DXBC. For RWTypedBuffer, the mask must cover all resource components. For RWRawBuffer and RWStructuredBuffer, valid masks are: x, xy, xyz, xyzw.

The ``alignment`` parameter specifies the **absolute alignment** of the
effective address (``base address + index``). For regular ``Store`` operations,
this defaults to 4 bytes for raw buffers. For templated ''Store'' operations,
this defaults to the size of the largest scalar component contained in the
aggregate template parameter type. The HLSL
``AlignedStore<T>(offset, alignment, value)`` intrinsic allows applications to
specify custom alignment values when they can guarantee higher alignment,
enabling backend compiler optimizations.

==================== =====================================================
Valid resource type # of active coordinates
==================== =====================================================
Expand Down
9 changes: 9 additions & 0 deletions docs/SPIR-V.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2768,6 +2768,15 @@ used to store a 32-bit unsigned integer. For ``Store2``, ``Store3``, and ``Store
done 2, 3, and 4 times, respectively. Each time the word offset is incremented by 1 before
performing ``OpAccessChain``.

``.AlignedLoad<T>()``, ``.AlignedStore<T>()``
++++++++++++++++++++++++++++++++++++++++++++++
These functions work identically to their non-aligned counterparts (``Load`` and ``Store``),
but accept an additional ``alignment`` parameter that specifies the guaranteed alignment of
the effective address. The alignment value is passed to SPIR-V load/store operations via
memory operands (``Aligned`` memory access qualifier) to enable backend optimizations.
The alignment parameter must be a compile-time constant power-of-two value that is greater
than or equal to the largest scalar type size and less than or equal to 4096 bytes.

``.Interlocked*()``
+++++++++++++++++++

Expand Down
4 changes: 3 additions & 1 deletion include/dxc/HlslIntrinsicOp.h
Original file line number Diff line number Diff line change
Expand Up @@ -270,9 +270,11 @@ enum class IntrinsicOp {
MOP_GatherRaw = 250,
MOP_GatherRed = 251,
MOP_GetSamplePosition = 252,
MOP_AlignedLoad = 405,
MOP_Load2 = 253,
MOP_Load3 = 254,
MOP_Load4 = 255,
MOP_AlignedStore = 406,
MOP_InterlockedAdd = 256,
MOP_InterlockedAdd64 = 257,
MOP_InterlockedAnd = 258,
Expand Down Expand Up @@ -411,7 +413,7 @@ enum class IntrinsicOp {
IOP_usign = 355,
MOP_InterlockedUMax = 356,
MOP_InterlockedUMin = 357,
Num_Intrinsics = 405,
Num_Intrinsics = 407,
};
inline bool HasUnsignedIntrinsicOpcode(IntrinsicOp opcode) {
switch (opcode) {
Expand Down
95 changes: 79 additions & 16 deletions lib/HLSL/HLOperationLower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4100,7 +4100,8 @@ struct ResLoadHelper {
ResLoadHelper(Instruction *Inst, DxilResource::Kind RK, Value *h, Value *idx,
Value *Offset, Value *status = nullptr, Value *mip = nullptr)
: intrinsicOpCode(IntrinsicOp::Num_Intrinsics), handle(h), retVal(Inst),
addr(idx), offset(Offset), status(status), mipLevel(mip) {
addr(idx), offset(Offset), status(status), mipLevel(mip),
customAlignment(0) {
opcode = LoadOpFromResKind(RK);
Type *Ty = Inst->getType();
if (opcode == OP::OpCode::RawBufferLoad && Ty->isVectorTy() &&
Expand All @@ -4118,6 +4119,8 @@ struct ResLoadHelper {
Value *offset;
Value *status;
Value *mipLevel;
unsigned
customAlignment; // For AlignedLoad/AlignedStore - 0 means use default
};

// Uses CI arguments to determine the index, offset, and mipLevel also depending
Expand All @@ -4129,7 +4132,8 @@ struct ResLoadHelper {
ResLoadHelper::ResLoadHelper(CallInst *CI, DxilResource::Kind RK,
DxilResourceBase::Class RC, Value *hdl,
IntrinsicOp IOP, LoadInst *TyBufSubLoad)
: intrinsicOpCode(IOP), handle(hdl), offset(nullptr), status(nullptr) {
: intrinsicOpCode(IOP), handle(hdl), offset(nullptr), status(nullptr),
customAlignment(0) {
opcode = LoadOpFromResKind(RK);
bool bForSubscript = false;
if (TyBufSubLoad) {
Expand All @@ -4144,6 +4148,26 @@ ResLoadHelper::ResLoadHelper(CallInst *CI, DxilResource::Kind RK,
unsigned StatusIdx = HLOperandIndex::kBufLoadStatusOpIdx;
unsigned OffsetIdx = HLOperandIndex::kInvalidIdx;

// Extract alignment for AlignedLoad operations
// AlignedLoad CallInst has: (opcode, handle, addr, alignment [, status])
// Regular Load has: (opcode, handle, addr [, status])
if (IOP == IntrinsicOp::MOP_AlignedLoad) {
// alignment is at index 3 (after opcode, handle, addr)
const unsigned kAlignmentIdx = kAddrIdx + 1;
if (argc > kAlignmentIdx) {
if (ConstantInt *AlignConst =
dyn_cast<ConstantInt>(CI->getArgOperand(kAlignmentIdx))) {
customAlignment = AlignConst->getZExtValue();
}
}
// Status is at index 4 for AlignedLoad (if present)
if (argc > kAlignmentIdx + 1) {
StatusIdx = kAlignmentIdx + 1;
} else {
StatusIdx = HLOperandIndex::kInvalidIdx;
}
}

if (opcode == OP::OpCode::TextureLoad) {
bool IsMS = (RK == DxilResource::Kind::Texture2DMS ||
RK == DxilResource::Kind::Texture2DMSArray);
Expand Down Expand Up @@ -4191,15 +4215,15 @@ ResLoadHelper::ResLoadHelper(CallInst *CI, DxilResource::Kind RK,
// Structured buffers receive no exterior offset in this constructor,
// but may need to increment it later.
offset = ConstantInt::get(i32Ty, 0U);
else if (argc > OffsetIdx)
else if (argc > OffsetIdx && OffsetIdx != HLOperandIndex::kInvalidIdx)
// Textures may set the offset from an explicit argument.
offset = CI->getArgOperand(OffsetIdx);
else
// All other cases use undef.
offset = UndefValue::get(i32Ty);

// Retrieve status value if provided.
if (argc > StatusIdx)
if (StatusIdx != HLOperandIndex::kInvalidIdx && argc > StatusIdx)
status = CI->getArgOperand(StatusIdx);
}

Expand Down Expand Up @@ -4246,8 +4270,15 @@ static SmallVector<Value *, 10> GetBufLoadArgs(ResLoadHelper helper,
OP::OpCode opcode = helper.opcode;
llvm::Constant *opArg = Builder.getInt32((uint32_t)opcode);

unsigned alignment = RK == DxilResource::Kind::RawBuffer ? 4U : 8U;
alignment = std::min(alignment, LdSize);
// Use custom alignment if provided (for AlignedLoad), otherwise calculate
// default
unsigned alignment;
if (helper.customAlignment != 0) {
alignment = helper.customAlignment;
} else {
alignment = RK == DxilResource::Kind::RawBuffer ? 4U : 8U;
alignment = std::min(alignment, LdSize);
}
Constant *alignmentVal = Builder.getInt32(alignment);

// Assemble args specific to the type bab/struct/typed:
Expand Down Expand Up @@ -4516,7 +4547,8 @@ void Split64bitValForStore(Type *EltTy, ArrayRef<Value *> vals, unsigned size,

void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
Value *Idx, Value *offset, IRBuilder<> &Builder,
hlsl::OP *OP, Value *sampIdx = nullptr) {
hlsl::OP *OP, Value *sampIdx = nullptr,
unsigned customAlignment = 0) {
Type *Ty = val->getType();
OP::OpCode opcode = OP::OpCode::NumOpCodes;
bool IsTyped = true;
Expand Down Expand Up @@ -4560,11 +4592,18 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
val = Builder.CreateZExt(val, Ty);
}

// If RawBuffer store of 64-bit value, don't set alignment to 8,
// since buffer alignment isn't known to be anything over 4.
unsigned alignValue = OP->GetAllocSizeForType(EltTy);
if (RK == HLResource::Kind::RawBuffer && alignValue > 4)
alignValue = 4;
// Use custom alignment if provided (for AlignedStore), otherwise calculate
// default
unsigned alignValue;
if (customAlignment != 0) {
alignValue = customAlignment;
} else {
// If RawBuffer store of 64-bit value, don't set alignment to 8,
// since buffer alignment isn't known to be anything over 4.
alignValue = OP->GetAllocSizeForType(EltTy);
if (RK == HLResource::Kind::RawBuffer && alignValue > 4)
alignValue = 4;
}
Constant *Alignment = OP->GetI32Const(alignValue);
bool is64 = EltTy == i64Ty || EltTy == doubleTy;
if (is64 && IsTyped) {
Expand Down Expand Up @@ -4758,10 +4797,30 @@ Value *TranslateResourceStore(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
IRBuilder<> Builder(CI);
DXIL::ResourceKind RK = pObjHelper->GetRK(handle);

Value *val = CI->getArgOperand(HLOperandIndex::kStoreValOpIdx);
Value *offset = CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx);
// Extract custom alignment for AlignedStore
unsigned customAlignment = 0;
unsigned valueArgIdx = HLOperandIndex::kStoreValOpIdx;
unsigned offsetArgIdx = HLOperandIndex::kStoreOffsetOpIdx;

if (IOP == IntrinsicOp::MOP_AlignedStore) {
// AlignedStore CallInst has: (opcode, handle, offset, alignment, value)
// Regular Store has: (opcode, handle, offset, value)
const unsigned kAlignmentIdx = HLOperandIndex::kStoreOffsetOpIdx + 1; // = 3
if (CI->getNumArgOperands() > kAlignmentIdx) {
if (ConstantInt *AlignConst =
dyn_cast<ConstantInt>(CI->getArgOperand(kAlignmentIdx))) {
customAlignment = AlignConst->getZExtValue();
}
}
valueArgIdx =
kAlignmentIdx + 1; // Value is after alignment for AlignedStore
}

Value *val = CI->getArgOperand(valueArgIdx);
Value *offset = CI->getArgOperand(offsetArgIdx);
Value *UndefI = UndefValue::get(Builder.getInt32Ty());
TranslateStore(RK, handle, val, offset, UndefI, Builder, hlslOP);
TranslateStore(RK, handle, val, offset, UndefI, Builder, hlslOP, nullptr,
customAlignment);

return nullptr;
}
Expand Down Expand Up @@ -7514,7 +7573,6 @@ constexpr IntrinsicLower gLowerTable[] = {
DXIL::OpCode::VectorAccumulate},

{IntrinsicOp::IOP_isnormal, TrivialIsSpecialFloat, DXIL::OpCode::IsNormal},

{IntrinsicOp::IOP_GetGroupWaveCount, EmptyLower,
DXIL::OpCode::GetGroupWaveCount},
{IntrinsicOp::IOP_GetGroupWaveIndex, EmptyLower,
Expand All @@ -7536,6 +7594,11 @@ constexpr IntrinsicLower gLowerTable[] = {
DXIL::OpCode::RayQuery_CommittedTriangleObjectPosition},
{IntrinsicOp::MOP_DxHitObject_TriangleObjectPosition, EmptyLower,
DXIL::OpCode::HitObject_TriangleObjectPosition},

{IntrinsicOp::MOP_AlignedLoad, TranslateResourceLoad,
DXIL::OpCode::NumOpCodes},
{IntrinsicOp::MOP_AlignedStore, TranslateResourceStore,
DXIL::OpCode::NumOpCodes},
};
constexpr size_t NumLowerTableEntries =
sizeof(gLowerTable) / sizeof(gLowerTable[0]);
Expand Down
9 changes: 9 additions & 0 deletions tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -7606,6 +7606,15 @@ def err_hlsl_unsupported_buffer_packoffset : Error<
"packoffset is only allowed within a constant buffer, not on the constant buffer declaration">;
def err_hlsl_unsupported_buffer_slot_target_specific : Error<
"user defined constant buffer slots cannot be target specific">;
def err_hlsl_aligned_buffer_unsupported_type : Error<
"AlignedLoad/AlignedStore functions cannot be used with %0. "
"Supported types are ByteAddressBuffer and RWByteAddressBuffer">;
def err_hlsl_aligned_buffer_invalid_alignment : Error<
"Alignment values require compile-time constant power-of-two values "
"that are >= largest scalar type size and <= 4096">;
def err_hlsl_aligned_buffer_alignment_too_small : Error<
"Alignment parameter of %0 bytes must be >= the largest scalar type size "
"%1 bytes for %2 element type">;
def err_hlsl_unsupported_typedbuffer_template_parameter : Error<
"elements of typed buffers and textures must be scalars or vectors">;
def err_hlsl_unsupported_typedbuffer_template_parameter_size : Error<
Expand Down
Loading