Skip to main content

rustc_codegen_llvm/
va_arg.rs

1use rustc_abi::{Align, BackendRepr, CVariadicStatus, Endian, HasDataLayout, Primitive, Size};
2use rustc_codegen_ssa::MemFlags;
3use rustc_codegen_ssa::common::IntPredicate;
4use rustc_codegen_ssa::mir::operand::OperandRef;
5use rustc_codegen_ssa::traits::{
6    BaseTypeCodegenMethods, BuilderMethods, ConstCodegenMethods, LayoutTypeCodegenMethods,
7};
8use rustc_middle::bug;
9use rustc_middle::ty::Ty;
10use rustc_middle::ty::layout::{HasTyCtxt, LayoutOf, TyAndLayout};
11use rustc_target::spec::{Arch, Env, LlvmAbi, RustcAbi};
12
13use crate::builder::Builder;
14use crate::llvm::Value;
15use crate::type_of::LayoutLlvmExt;
16
17fn round_up_to_alignment<'ll>(
18    bx: &mut Builder<'_, 'll, '_>,
19    mut value: &'ll Value,
20    align: Align,
21) -> &'ll Value {
22    value = bx.add(value, bx.cx().const_i32(align.bytes() as i32 - 1));
23    return bx.and(value, bx.cx().const_i32(-(align.bytes() as i32)));
24}
25
26fn round_pointer_up_to_alignment<'ll>(
27    bx: &mut Builder<'_, 'll, '_>,
28    addr: &'ll Value,
29    align: Align,
30) -> &'ll Value {
31    let ptr = bx.inbounds_ptradd(addr, bx.const_i32(align.bytes() as i32 - 1));
32    let pointer_width = bx.tcx().sess.target.pointer_width;
33    let mask = align.bytes().wrapping_neg() & (u64::MAX >> (64 - pointer_width));
34    bx.call_intrinsic(
35        "llvm.ptrmask",
36        &[bx.type_ptr(), bx.type_isize()],
37        &[ptr, bx.const_usize(mask)],
38    )
39}
40
41fn emit_direct_ptr_va_arg<'ll, 'tcx>(
42    bx: &mut Builder<'_, 'll, 'tcx>,
43    list: OperandRef<'tcx, &'ll Value>,
44    size: Size,
45    align: Align,
46    slot_size: Align,
47    allow_higher_align: bool,
48    force_right_adjust: bool,
49) -> (&'ll Value, Align) {
50    let va_list_ty = bx.type_ptr();
51    let va_list_addr = list.immediate();
52
53    let ptr_align_abi = bx.tcx().data_layout.pointer_align().abi;
54    let ptr = bx.load(va_list_ty, va_list_addr, ptr_align_abi);
55
56    let (addr, addr_align) = if allow_higher_align && align > slot_size {
57        (round_pointer_up_to_alignment(bx, ptr, align), align)
58    } else {
59        (ptr, slot_size)
60    };
61
62    let aligned_size = size.align_to(slot_size).bytes() as i32;
63    let full_direct_size = bx.cx().const_i32(aligned_size);
64    let next = bx.inbounds_ptradd(addr, full_direct_size);
65    bx.store(next, va_list_addr, ptr_align_abi);
66
67    if size.bytes() < slot_size.bytes()
68        && bx.tcx().sess.target.endian == Endian::Big
69        && force_right_adjust
70    {
71        let adjusted_size = bx.cx().const_i32((slot_size.bytes() - size.bytes()) as i32);
72        let adjusted = bx.inbounds_ptradd(addr, adjusted_size);
73        // We're in the middle of a slot now, so use the type's alignment, not the slot's.
74        (adjusted, align)
75    } else {
76        (addr, addr_align)
77    }
78}
79
80enum PassMode {
81    Direct,
82    Indirect,
83}
84
85enum SlotSize {
86    Bytes8 = 8,
87    Bytes4 = 4,
88    Bytes1 = 1,
89}
90
91/// Whether to respect a value alignment that is higher than the slot alignment.
92///
93/// When `No` the argument is in the next slot, when `Yes` there will be empty slots
94/// until a slot's starting address has the required alignment.
95enum AllowHigherAlign {
96    No,
97    Yes,
98}
99
100/// Determines where in the slot the value is located. Only takes effect on big-endian targets.
101///
102/// with 8-byte slots, a 32-bit integer is either stored right-adjusted:
103///
104/// ```text
105/// [0x0, 0x0, 0x0, 0x0, 0xaa, 0xaa, 0xaa, 0xaa]
106/// ```
107///
108/// or left-adjusted:
109///
110/// ```text
111/// [0xaa, 0xaa, 0xaa, 0xaa, 0x0, 0x0, 0x0, 0x0]
112/// ```
113///
114/// Most big-endian targets store values as right-adjusted.
115enum ForceRightAdjust {
116    No,
117    Yes,
118}
119
120fn emit_ptr_va_arg<'ll, 'tcx>(
121    bx: &mut Builder<'_, 'll, 'tcx>,
122    list: OperandRef<'tcx, &'ll Value>,
123    target_ty: Ty<'tcx>,
124    pass_mode: PassMode,
125    slot_size: SlotSize,
126    allow_higher_align: AllowHigherAlign,
127    force_right_adjust: ForceRightAdjust,
128) -> &'ll Value {
129    let indirect = #[allow(non_exhaustive_omitted_patterns)] match pass_mode {
    PassMode::Indirect => true,
    _ => false,
}matches!(pass_mode, PassMode::Indirect);
130    let allow_higher_align = #[allow(non_exhaustive_omitted_patterns)] match allow_higher_align {
    AllowHigherAlign::Yes => true,
    _ => false,
}matches!(allow_higher_align, AllowHigherAlign::Yes);
131    let force_right_adjust = #[allow(non_exhaustive_omitted_patterns)] match force_right_adjust {
    ForceRightAdjust::Yes => true,
    _ => false,
}matches!(force_right_adjust, ForceRightAdjust::Yes);
132    let slot_size = Align::from_bytes(slot_size as u64).unwrap();
133
134    let layout = bx.cx.layout_of(target_ty);
135    let (llty, size, align) = if indirect {
136        (
137            bx.cx.layout_of(Ty::new_imm_ptr(bx.cx.tcx, target_ty)).llvm_type(bx.cx),
138            bx.cx.data_layout().pointer_size(),
139            bx.cx.data_layout().pointer_align(),
140        )
141    } else {
142        (layout.llvm_type(bx.cx), layout.size, layout.align)
143    };
144    let (addr, addr_align) = emit_direct_ptr_va_arg(
145        bx,
146        list,
147        size,
148        align.abi,
149        slot_size,
150        allow_higher_align,
151        force_right_adjust,
152    );
153    if indirect {
154        let tmp_ret = bx.load(llty, addr, addr_align);
155        bx.load(layout.llvm_type(bx.cx), tmp_ret, align.abi)
156    } else {
157        bx.load(llty, addr, addr_align)
158    }
159}
160
161fn emit_aapcs_va_arg<'ll, 'tcx>(
162    bx: &mut Builder<'_, 'll, 'tcx>,
163    list: OperandRef<'tcx, &'ll Value>,
164    target_ty: Ty<'tcx>,
165) -> &'ll Value {
166    let dl = bx.cx.data_layout();
167
168    // Implementation of the AAPCS64 calling convention for va_args see
169    // https://github.com/ARM-software/abi-aa/blob/master/aapcs64/aapcs64.rst
170    //
171    // typedef struct  va_list {
172    //     void * stack; // next stack param
173    //     void * gr_top; // end of GP arg reg save area
174    //     void * vr_top; // end of FP/SIMD arg reg save area
175    //     int gr_offs; // offset from  gr_top to next GP register arg
176    //     int vr_offs; // offset from  vr_top to next FP/SIMD register arg
177    // } va_list;
178    let va_list_addr = list.immediate();
179
180    // There is no padding between fields since `void*` is size=8 align=8, `int` is size=4 align=4.
181    // See https://github.com/ARM-software/abi-aa/blob/master/aapcs64/aapcs64.rst
182    // Table 1, Byte size and byte alignment of fundamental data types
183    // Table 3, Mapping of C & C++ built-in data types
184    let ptr_offset = 8;
185    let i32_offset = 4;
186    let gr_top = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(ptr_offset));
187    let vr_top = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(2 * ptr_offset));
188    let gr_offs = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(3 * ptr_offset));
189    let vr_offs = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(3 * ptr_offset + i32_offset));
190
191    let layout = bx.cx.layout_of(target_ty);
192
193    let maybe_reg = bx.append_sibling_block("va_arg.maybe_reg");
194    let in_reg = bx.append_sibling_block("va_arg.in_reg");
195    let on_stack = bx.append_sibling_block("va_arg.on_stack");
196    let end = bx.append_sibling_block("va_arg.end");
197    let zero = bx.const_i32(0);
198    let offset_align = Align::from_bytes(4).unwrap();
199
200    let gr_type = target_ty.is_any_ptr() || target_ty.is_integral();
201    let (reg_off, reg_top, slot_size) = if gr_type {
202        let nreg = layout.size.bytes().div_ceil(8);
203        (gr_offs, gr_top, nreg * 8)
204    } else {
205        let nreg = layout.size.bytes().div_ceil(16);
206        (vr_offs, vr_top, nreg * 16)
207    };
208
209    // if the offset >= 0 then the value will be on the stack
210    let mut reg_off_v = bx.load(bx.type_i32(), reg_off, offset_align);
211    let use_stack = bx.icmp(IntPredicate::IntSGE, reg_off_v, zero);
212    bx.cond_br(use_stack, on_stack, maybe_reg);
213
214    // The value at this point might be in a register, but there is a chance that
215    // it could be on the stack so we have to update the offset and then check
216    // the offset again.
217
218    bx.switch_to_block(maybe_reg);
219    if gr_type && layout.align.bytes() > 8 {
220        reg_off_v = bx.add(reg_off_v, bx.const_i32(15));
221        reg_off_v = bx.and(reg_off_v, bx.const_i32(-16));
222    }
223    let new_reg_off_v = bx.add(reg_off_v, bx.const_i32(slot_size as i32));
224
225    bx.store(new_reg_off_v, reg_off, offset_align);
226
227    // Check to see if we have overflowed the registers as a result of this.
228    // If we have then we need to use the stack for this value
229    let use_stack = bx.icmp(IntPredicate::IntSGT, new_reg_off_v, zero);
230    bx.cond_br(use_stack, on_stack, in_reg);
231
232    bx.switch_to_block(in_reg);
233    let top_type = bx.type_ptr();
234    let top = bx.load(top_type, reg_top, dl.pointer_align().abi);
235
236    // reg_value = *(@top + reg_off_v);
237    let mut reg_addr = bx.ptradd(top, reg_off_v);
238    if bx.tcx().sess.target.endian == Endian::Big && layout.size.bytes() != slot_size {
239        // On big-endian systems the value is right-aligned in its slot.
240        let offset = bx.const_i32((slot_size - layout.size.bytes()) as i32);
241        reg_addr = bx.ptradd(reg_addr, offset);
242    }
243    let reg_type = layout.llvm_type(bx);
244    let reg_value = bx.load(reg_type, reg_addr, layout.align.abi);
245    bx.br(end);
246
247    // On Stack block
248    bx.switch_to_block(on_stack);
249    let stack_value = emit_ptr_va_arg(
250        bx,
251        list,
252        target_ty,
253        PassMode::Direct,
254        SlotSize::Bytes8,
255        AllowHigherAlign::Yes,
256        ForceRightAdjust::No,
257    );
258    bx.br(end);
259
260    bx.switch_to_block(end);
261    let val =
262        bx.phi(layout.immediate_llvm_type(bx), &[reg_value, stack_value], &[in_reg, on_stack]);
263
264    val
265}
266
267fn emit_powerpc_va_arg<'ll, 'tcx>(
268    bx: &mut Builder<'_, 'll, 'tcx>,
269    list: OperandRef<'tcx, &'ll Value>,
270    target_ty: Ty<'tcx>,
271) -> &'ll Value {
272    let dl = bx.cx.data_layout();
273
274    // struct __va_list_tag {
275    //   unsigned char gpr;
276    //   unsigned char fpr;
277    //   unsigned short reserved;
278    //   void *overflow_arg_area;
279    //   void *reg_save_area;
280    // };
281    let va_list_addr = list.immediate();
282
283    // Peel off any newtype wrappers.
284    let layout = {
285        let mut layout = bx.cx.layout_of(target_ty);
286
287        while let Some((_, inner)) = layout.non_1zst_field(bx.cx) {
288            layout = inner;
289        }
290
291        layout
292    };
293
294    // Rust does not currently support any powerpc softfloat targets.
295    let target = &bx.cx.tcx.sess.target;
296    let is_soft_float_abi = target.rustc_abi == Some(RustcAbi::Softfloat);
297    if !!is_soft_float_abi {
    ::core::panicking::panic("assertion failed: !is_soft_float_abi")
};assert!(!is_soft_float_abi);
298
299    // All instances of VaArgSafe are passed directly.
300    let is_indirect = false;
301
302    let (is_i64, is_int, is_f64) = match layout.layout.backend_repr() {
303        BackendRepr::Scalar(scalar) => match scalar.primitive() {
304            rustc_abi::Primitive::Int(integer, _) => (integer.size().bits() == 64, true, false),
305            rustc_abi::Primitive::Float(float) => (false, false, float.size().bits() == 64),
306            rustc_abi::Primitive::Pointer(_) => (false, true, false),
307        },
308        _ => {
    ::core::panicking::panic_fmt(format_args!("internal error: entered unreachable code: {0}",
            format_args!("all instances of VaArgSafe are represented as scalars")));
}unreachable!("all instances of VaArgSafe are represented as scalars"),
309    };
310
311    let num_regs_addr = if is_int || is_soft_float_abi {
312        va_list_addr // gpr
313    } else {
314        bx.inbounds_ptradd(va_list_addr, bx.const_usize(1)) // fpr
315    };
316
317    let mut num_regs = bx.load(bx.type_i8(), num_regs_addr, dl.i8_align);
318
319    // "Align" the register count when the type is passed as `i64`.
320    if is_i64 || (is_f64 && is_soft_float_abi) {
321        num_regs = bx.add(num_regs, bx.const_u8(1));
322        num_regs = bx.and(num_regs, bx.const_u8(0b1111_1110));
323    }
324
325    let max_regs = 8u8;
326    let use_regs = bx.icmp(IntPredicate::IntULT, num_regs, bx.const_u8(max_regs));
327    let ptr_align_abi = bx.tcx().data_layout.pointer_align().abi;
328
329    let in_reg = bx.append_sibling_block("va_arg.in_reg");
330    let in_mem = bx.append_sibling_block("va_arg.in_mem");
331    let end = bx.append_sibling_block("va_arg.end");
332
333    bx.cond_br(use_regs, in_reg, in_mem);
334
335    let reg_addr = {
336        bx.switch_to_block(in_reg);
337
338        let reg_safe_area_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(1 + 1 + 2 + 4));
339        let mut reg_addr = bx.load(bx.type_ptr(), reg_safe_area_ptr, ptr_align_abi);
340
341        // Floating-point registers start after the general-purpose registers.
342        if !is_int && !is_soft_float_abi {
343            reg_addr = bx.inbounds_ptradd(reg_addr, bx.cx.const_usize(32))
344        }
345
346        // Get the address of the saved value by scaling the number of
347        // registers we've used by the number of.
348        let reg_size = if is_int || is_soft_float_abi { 4 } else { 8 };
349        let reg_offset = bx.mul(num_regs, bx.cx().const_u8(reg_size));
350        let reg_addr = bx.inbounds_ptradd(reg_addr, reg_offset);
351
352        // Increase the used-register count.
353        let reg_incr = if is_i64 || (is_f64 && is_soft_float_abi) { 2 } else { 1 };
354        let new_num_regs = bx.add(num_regs, bx.cx.const_u8(reg_incr));
355        bx.store(new_num_regs, num_regs_addr, dl.i8_align);
356
357        bx.br(end);
358
359        reg_addr
360    };
361
362    let mem_addr = {
363        bx.switch_to_block(in_mem);
364
365        bx.store(bx.const_u8(max_regs), num_regs_addr, dl.i8_align);
366
367        // Everything in the overflow area is rounded up to a size of at least 4.
368        let overflow_area_align = Align::from_bytes(4).unwrap();
369
370        let size = if !is_indirect {
371            layout.layout.size.align_to(overflow_area_align)
372        } else {
373            dl.pointer_size()
374        };
375
376        let overflow_area_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(1 + 1 + 2));
377        let mut overflow_area = bx.load(bx.type_ptr(), overflow_area_ptr, ptr_align_abi);
378
379        // Round up address of argument to alignment
380        if layout.layout.align.abi > overflow_area_align {
381            overflow_area =
382                round_pointer_up_to_alignment(bx, overflow_area, layout.layout.align.abi);
383        }
384
385        let mem_addr = overflow_area;
386
387        // Increase the overflow area.
388        overflow_area = bx.inbounds_ptradd(overflow_area, bx.const_usize(size.bytes()));
389        bx.store(overflow_area, overflow_area_ptr, ptr_align_abi);
390
391        bx.br(end);
392
393        mem_addr
394    };
395
396    // Return the appropriate result.
397    bx.switch_to_block(end);
398    let val_addr = bx.phi(bx.type_ptr(), &[reg_addr, mem_addr], &[in_reg, in_mem]);
399    let val_type = layout.llvm_type(bx);
400    let val_addr =
401        if is_indirect { bx.load(bx.cx.type_ptr(), val_addr, ptr_align_abi) } else { val_addr };
402    bx.load(val_type, val_addr, layout.align.abi)
403}
404
405fn emit_s390x_va_arg<'ll, 'tcx>(
406    bx: &mut Builder<'_, 'll, 'tcx>,
407    list: OperandRef<'tcx, &'ll Value>,
408    target_ty: Ty<'tcx>,
409) -> &'ll Value {
410    let dl = bx.cx.data_layout();
411
412    // Implementation of the s390x ELF ABI calling convention for va_args see
413    // https://github.com/IBM/s390x-abi (chapter 1.2.4)
414    //
415    // typedef struct __va_list_tag {
416    //     long __gpr;
417    //     long __fpr;
418    //     void *__overflow_arg_area;
419    //     void *__reg_save_area;
420    // } va_list[1];
421    let va_list_addr = list.immediate();
422
423    // There is no padding between fields since `long` and `void*` both have size=8 align=8.
424    // https://github.com/IBM/s390x-abi (Table 1.1.: Scalar types)
425    let i64_offset = 8;
426    let ptr_offset = 8;
427    let gpr = va_list_addr;
428    let fpr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(i64_offset));
429    let overflow_arg_area = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(2 * i64_offset));
430    let reg_save_area =
431        bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(2 * i64_offset + ptr_offset));
432
433    let layout = bx.cx.layout_of(target_ty);
434
435    let in_reg = bx.append_sibling_block("va_arg.in_reg");
436    let in_mem = bx.append_sibling_block("va_arg.in_mem");
437    let end = bx.append_sibling_block("va_arg.end");
438    let ptr_align_abi = dl.pointer_align().abi;
439
440    // FIXME: vector ABI not yet supported.
441    let target_ty_size = bx.cx.size_of(target_ty).bytes();
442    let indirect: bool = target_ty_size > 8 || !target_ty_size.is_power_of_two();
443    let unpadded_size = if indirect { 8 } else { target_ty_size };
444    let padded_size = 8;
445    let padding = padded_size - unpadded_size;
446
447    let gpr_type = indirect || !layout.is_single_fp_element(bx.cx);
448    let (max_regs, reg_count, reg_save_index, reg_padding) =
449        if gpr_type { (5, gpr, 2, padding) } else { (4, fpr, 16, 0) };
450
451    // Check whether the value was passed in a register or in memory.
452    let reg_count_v = bx.load(bx.type_i64(), reg_count, Align::from_bytes(8).unwrap());
453    let use_regs = bx.icmp(IntPredicate::IntULT, reg_count_v, bx.const_u64(max_regs));
454    bx.cond_br(use_regs, in_reg, in_mem);
455
456    // Emit code to load the value if it was passed in a register.
457    bx.switch_to_block(in_reg);
458
459    // Work out the address of the value in the register save area.
460    let reg_ptr_v = bx.load(bx.type_ptr(), reg_save_area, ptr_align_abi);
461    let scaled_reg_count = bx.mul(reg_count_v, bx.const_u64(8));
462    let reg_off = bx.add(scaled_reg_count, bx.const_u64(reg_save_index * 8 + reg_padding));
463    let reg_addr = bx.ptradd(reg_ptr_v, reg_off);
464
465    // Update the register count.
466    let new_reg_count_v = bx.add(reg_count_v, bx.const_u64(1));
467    bx.store(new_reg_count_v, reg_count, Align::from_bytes(8).unwrap());
468    bx.br(end);
469
470    // Emit code to load the value if it was passed in memory.
471    bx.switch_to_block(in_mem);
472
473    // Work out the address of the value in the argument overflow area.
474    let arg_ptr_v = bx.load(bx.type_ptr(), overflow_arg_area, ptr_align_abi);
475    let arg_off = bx.const_u64(padding);
476    let mem_addr = bx.ptradd(arg_ptr_v, arg_off);
477
478    // Update the argument overflow area pointer.
479    let arg_size = bx.cx().const_u64(padded_size);
480    let new_arg_ptr_v = bx.inbounds_ptradd(arg_ptr_v, arg_size);
481    bx.store(new_arg_ptr_v, overflow_arg_area, ptr_align_abi);
482    bx.br(end);
483
484    // Return the appropriate result.
485    bx.switch_to_block(end);
486    let val_addr = bx.phi(bx.type_ptr(), &[reg_addr, mem_addr], &[in_reg, in_mem]);
487    let val_type = layout.llvm_type(bx);
488    let val_addr =
489        if indirect { bx.load(bx.cx.type_ptr(), val_addr, ptr_align_abi) } else { val_addr };
490    bx.load(val_type, val_addr, layout.align.abi)
491}
492
493fn emit_x86_64_sysv64_va_arg<'ll, 'tcx>(
494    bx: &mut Builder<'_, 'll, 'tcx>,
495    list: OperandRef<'tcx, &'ll Value>,
496    target_ty: Ty<'tcx>,
497) -> &'ll Value {
498    let dl = bx.cx.data_layout();
499
500    // Implementation of the systemv x86_64 ABI calling convention for va_args, see
501    // https://gitlab.com/x86-psABIs/x86-64-ABI (section 3.5.7). This implementation is heavily
502    // based on the one in clang.
503
504    // We're able to take some shortcuts because the return type of `va_arg` must implement the
505    // `VaArgSafe` trait. Currently, only pointers, f64, i32, u32, i64 and u64 implement this trait.
506
507    // typedef struct __va_list_tag {
508    //     unsigned int gp_offset;
509    //     unsigned int fp_offset;
510    //     void *overflow_arg_area;
511    //     void *reg_save_area;
512    // } va_list[1];
513    let va_list_addr = list.immediate();
514
515    // Peel off any newtype wrappers.
516    //
517    // The "C" ABI does not unwrap newtypes (see `ReprOptions::inhibit_newtype_abi_optimization`).
518    // Here, we do actually want the unwrapped representation, because that is how LLVM/Clang
519    // pass such types to variadic functions.
520    //
521    // An example of a type that must be unwrapped is `Foo` below. Without the unwrapping, it has
522    // `BackendRepr::Memory`, but we need it to be `BackendRepr::Scalar` to generate correct code.
523    //
524    // ```
525    // #[repr(C)]
526    // struct Empty;
527    //
528    // #[repr(C)]
529    // struct Foo([Empty; 8], i32);
530    // ```
531    let layout = {
532        let mut layout = bx.cx.layout_of(target_ty);
533
534        while let Some((_, inner)) = layout.non_1zst_field(bx.cx) {
535            layout = inner;
536        }
537
538        layout
539    };
540
541    // AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed
542    // in the registers. If not go to step 7.
543
544    // AMD64-ABI 3.5.7p5: Step 2. Compute num_gp to hold the number of
545    // general purpose registers needed to pass type and num_fp to hold
546    // the number of floating point registers needed.
547
548    let mut num_gp_registers = 0;
549    let mut num_fp_registers = 0;
550
551    let mut registers_for_primitive = |p| match p {
552        Primitive::Int(integer, _is_signed) => {
553            num_gp_registers += integer.size().bytes().div_ceil(8) as u32;
554        }
555        Primitive::Float(float) => {
556            num_fp_registers += float.size().bytes().div_ceil(16) as u32;
557        }
558        Primitive::Pointer(_) => {
559            num_gp_registers += 1;
560        }
561    };
562
563    match layout.layout.backend_repr() {
564        BackendRepr::Scalar(scalar) => {
565            registers_for_primitive(scalar.primitive());
566        }
567        BackendRepr::ScalarPair(scalar1, scalar2) => {
568            registers_for_primitive(scalar1.primitive());
569            registers_for_primitive(scalar2.primitive());
570        }
571        BackendRepr::SimdVector { .. } | BackendRepr::SimdScalableVector { .. } => {
572            // Because no instance of VaArgSafe uses a non-scalar `BackendRepr`.
573            {
    ::core::panicking::panic_fmt(format_args!("internal error: entered unreachable code: {0}",
            format_args!("No x86-64 SysV va_arg implementation for {0:?}",
                layout.layout.backend_repr())));
}unreachable!(
574                "No x86-64 SysV va_arg implementation for {:?}",
575                layout.layout.backend_repr()
576            )
577        }
578        BackendRepr::Memory { .. } => {
579            let mem_addr = x86_64_sysv64_va_arg_from_memory(bx, va_list_addr, layout);
580            return bx.load(layout.llvm_type(bx), mem_addr, layout.align.abi);
581        }
582    };
583
584    // AMD64-ABI 3.5.7p5: Step 3. Verify whether arguments fit into
585    // registers. In the case: l->gp_offset > 48 - num_gp * 8 or
586    // l->fp_offset > 176 - num_fp * 16 go to step 7.
587
588    let unsigned_int_offset = 4;
589    let ptr_offset = 8;
590    let gp_offset_ptr = va_list_addr;
591    let fp_offset_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(unsigned_int_offset));
592
593    let gp_offset_v = bx.load(bx.type_i32(), gp_offset_ptr, Align::from_bytes(8).unwrap());
594    let fp_offset_v = bx.load(bx.type_i32(), fp_offset_ptr, Align::from_bytes(4).unwrap());
595
596    let mut use_regs = bx.const_bool(false);
597
598    if num_gp_registers > 0 {
599        let max_offset_val = 48u32 - num_gp_registers * 8;
600        let fits_in_gp = bx.icmp(IntPredicate::IntULE, gp_offset_v, bx.const_u32(max_offset_val));
601        use_regs = fits_in_gp;
602    }
603
604    if num_fp_registers > 0 {
605        let max_offset_val = 176u32 - num_fp_registers * 16;
606        let fits_in_fp = bx.icmp(IntPredicate::IntULE, fp_offset_v, bx.const_u32(max_offset_val));
607        use_regs = if num_gp_registers > 0 { bx.and(use_regs, fits_in_fp) } else { fits_in_fp };
608    }
609
610    let in_reg = bx.append_sibling_block("va_arg.in_reg");
611    let in_mem = bx.append_sibling_block("va_arg.in_mem");
612    let end = bx.append_sibling_block("va_arg.end");
613
614    bx.cond_br(use_regs, in_reg, in_mem);
615
616    // Emit code to load the value if it was passed in a register.
617    bx.switch_to_block(in_reg);
618
619    // AMD64-ABI 3.5.7p5: Step 4. Fetch type from l->reg_save_area with
620    // an offset of l->gp_offset and/or l->fp_offset. This may require
621    // copying to a temporary location in case the parameter is passed
622    // in different register classes or requires an alignment greater
623    // than 8 for general purpose registers and 16 for XMM registers.
624    //
625    // FIXME(llvm): This really results in shameful code when we end up needing to
626    // collect arguments from different places; often what should result in a
627    // simple assembling of a structure from scattered addresses has many more
628    // loads than necessary. Can we clean this up?
629    let reg_save_area_ptr =
630        bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(2 * unsigned_int_offset + ptr_offset));
631    let reg_save_area_v = bx.load(bx.type_ptr(), reg_save_area_ptr, dl.pointer_align().abi);
632
633    let reg_addr = match layout.layout.backend_repr() {
634        BackendRepr::Scalar(scalar) => match scalar.primitive() {
635            Primitive::Int(_, _) | Primitive::Pointer(_) => {
636                let reg_addr = bx.inbounds_ptradd(reg_save_area_v, gp_offset_v);
637
638                // Copy into a temporary if the type is more aligned than the register save area.
639                let gp_align = Align::from_bytes(8).unwrap();
640                copy_to_temporary_if_more_aligned(bx, reg_addr, layout, gp_align)
641            }
642            Primitive::Float(_) => bx.inbounds_ptradd(reg_save_area_v, fp_offset_v),
643        },
644        BackendRepr::ScalarPair(scalar1, scalar2) => {
645            let ty_lo = bx.cx().scalar_pair_element_backend_type(layout, 0, false);
646            let ty_hi = bx.cx().scalar_pair_element_backend_type(layout, 1, false);
647
648            let align_lo = layout.field(bx.cx, 0).layout.align().abi;
649            let align_hi = layout.field(bx.cx, 1).layout.align().abi;
650
651            match (scalar1.primitive(), scalar2.primitive()) {
652                (Primitive::Float(_), Primitive::Float(_)) => {
653                    // SSE registers are spaced 16 bytes apart in the register save
654                    // area, we need to collect the two eightbytes together.
655                    // The ABI isn't explicit about this, but it seems reasonable
656                    // to assume that the slots are 16-byte aligned, since the stack is
657                    // naturally 16-byte aligned and the prologue is expected to store
658                    // all the SSE registers to the RSA.
659                    let reg_lo_addr = bx.inbounds_ptradd(reg_save_area_v, fp_offset_v);
660                    let reg_hi_addr = bx.inbounds_ptradd(reg_lo_addr, bx.const_i32(16));
661
662                    let align = layout.layout.align().abi;
663                    let tmp = bx.alloca(layout.layout.size(), align);
664
665                    let reg_lo = bx.load(ty_lo, reg_lo_addr, align_lo);
666                    let reg_hi = bx.load(ty_hi, reg_hi_addr, align_hi);
667
668                    let offset = scalar1.size(bx.cx).align_to(align_hi).bytes();
669                    let field0 = tmp;
670                    let field1 = bx.inbounds_ptradd(tmp, bx.const_u32(offset as u32));
671
672                    bx.store(reg_lo, field0, align);
673                    bx.store(reg_hi, field1, align);
674
675                    tmp
676                }
677                (Primitive::Float(_), _) | (_, Primitive::Float(_)) => {
678                    let gp_addr = bx.inbounds_ptradd(reg_save_area_v, gp_offset_v);
679                    let fp_addr = bx.inbounds_ptradd(reg_save_area_v, fp_offset_v);
680
681                    let (reg_lo_addr, reg_hi_addr) = match scalar1.primitive() {
682                        Primitive::Float(_) => (fp_addr, gp_addr),
683                        Primitive::Int(_, _) | Primitive::Pointer(_) => (gp_addr, fp_addr),
684                    };
685
686                    let tmp = bx.alloca(layout.layout.size(), layout.layout.align().abi);
687
688                    let reg_lo = bx.load(ty_lo, reg_lo_addr, align_lo);
689                    let reg_hi = bx.load(ty_hi, reg_hi_addr, align_hi);
690
691                    let offset = scalar1.size(bx.cx).align_to(align_hi).bytes();
692                    let field0 = tmp;
693                    let field1 = bx.inbounds_ptradd(tmp, bx.const_u32(offset as u32));
694
695                    bx.store(reg_lo, field0, align_lo);
696                    bx.store(reg_hi, field1, align_hi);
697
698                    tmp
699                }
700                (_, _) => {
701                    // Two integer/pointer values are just contiguous in memory.
702                    let reg_addr = bx.inbounds_ptradd(reg_save_area_v, gp_offset_v);
703
704                    // Copy into a temporary if the type is more aligned than the register save area.
705                    let gp_align = Align::from_bytes(8).unwrap();
706                    copy_to_temporary_if_more_aligned(bx, reg_addr, layout, gp_align)
707                }
708            }
709        }
710        // The Previous match on `BackendRepr` means control flow already escaped.
711        BackendRepr::SimdVector { .. }
712        | BackendRepr::SimdScalableVector { .. }
713        | BackendRepr::Memory { .. } => ::core::panicking::panic("internal error: entered unreachable code")unreachable!(),
714    };
715
716    // AMD64-ABI 3.5.7p5: Step 5. Set:
717    // l->gp_offset = l->gp_offset + num_gp * 8
718    if num_gp_registers > 0 {
719        let offset = bx.const_u32(num_gp_registers * 8);
720        let sum = bx.add(gp_offset_v, offset);
721        // An alignment of 8 because `__va_list_tag` is 8-aligned and this is its first field.
722        bx.store(sum, gp_offset_ptr, Align::from_bytes(8).unwrap());
723    }
724
725    // l->fp_offset = l->fp_offset + num_fp * 16.
726    if num_fp_registers > 0 {
727        let offset = bx.const_u32(num_fp_registers * 16);
728        let sum = bx.add(fp_offset_v, offset);
729        bx.store(sum, fp_offset_ptr, Align::from_bytes(4).unwrap());
730    }
731
732    bx.br(end);
733
734    bx.switch_to_block(in_mem);
735    let mem_addr = x86_64_sysv64_va_arg_from_memory(bx, va_list_addr, layout);
736    bx.br(end);
737
738    bx.switch_to_block(end);
739
740    let val_type = layout.llvm_type(bx);
741    let val_addr = bx.phi(bx.type_ptr(), &[reg_addr, mem_addr], &[in_reg, in_mem]);
742
743    bx.load(val_type, val_addr, layout.align.abi)
744}
745
746/// Copy into a temporary if the type is more aligned than the register save area.
747fn copy_to_temporary_if_more_aligned<'ll, 'tcx>(
748    bx: &mut Builder<'_, 'll, 'tcx>,
749    reg_addr: &'ll Value,
750    layout: TyAndLayout<'tcx>,
751    src_align: Align,
752) -> &'ll Value {
753    if layout.layout.align.abi > src_align {
754        let tmp = bx.alloca(layout.layout.size(), layout.layout.align().abi);
755        bx.memcpy(
756            tmp,
757            layout.layout.align.abi,
758            reg_addr,
759            src_align,
760            bx.const_u32(layout.layout.size().bytes() as u32),
761            MemFlags::empty(),
762            None,
763        );
764        tmp
765    } else {
766        reg_addr
767    }
768}
769
770fn x86_64_sysv64_va_arg_from_memory<'ll, 'tcx>(
771    bx: &mut Builder<'_, 'll, 'tcx>,
772    va_list_addr: &'ll Value,
773    layout: TyAndLayout<'tcx>,
774) -> &'ll Value {
775    let dl = bx.cx.data_layout();
776    let ptr_align_abi = dl.data_layout().pointer_align().abi;
777
778    let overflow_arg_area_ptr = bx.inbounds_ptradd(va_list_addr, bx.const_usize(8));
779
780    let overflow_arg_area_v = bx.load(bx.type_ptr(), overflow_arg_area_ptr, ptr_align_abi);
781    // AMD64-ABI 3.5.7p5: Step 7. Align l->overflow_arg_area upwards to a 16
782    // byte boundary if alignment needed by type exceeds 8 byte boundary.
783    // It isn't stated explicitly in the standard, but in practice we use
784    // alignment greater than 16 where necessary.
785    if layout.layout.align.bytes() > 8 {
786        {
    ::core::panicking::panic_fmt(format_args!("internal error: entered unreachable code: {0}",
            format_args!("all instances of VaArgSafe have an alignment <= 8")));
};unreachable!("all instances of VaArgSafe have an alignment <= 8");
787    }
788
789    // AMD64-ABI 3.5.7p5: Step 8. Fetch type from l->overflow_arg_area.
790    let mem_addr = overflow_arg_area_v;
791
792    // AMD64-ABI 3.5.7p5: Step 9. Set l->overflow_arg_area to:
793    // l->overflow_arg_area + sizeof(type).
794    // AMD64-ABI 3.5.7p5: Step 10. Align l->overflow_arg_area upwards to
795    // an 8 byte boundary.
796    let size_in_bytes = layout.layout.size().bytes();
797    let offset = bx.const_i32(size_in_bytes.next_multiple_of(8) as i32);
798    let overflow_arg_area = bx.inbounds_ptradd(overflow_arg_area_v, offset);
799    bx.store(overflow_arg_area, overflow_arg_area_ptr, ptr_align_abi);
800
801    mem_addr
802}
803
804fn emit_hexagon_va_arg_musl<'ll, 'tcx>(
805    bx: &mut Builder<'_, 'll, 'tcx>,
806    list: OperandRef<'tcx, &'ll Value>,
807    target_ty: Ty<'tcx>,
808) -> &'ll Value {
809    // Implementation of va_arg for Hexagon musl target.
810    // Based on LLVM's HexagonBuiltinVaList implementation.
811    //
812    // struct __va_list_tag {
813    //   void *__current_saved_reg_area_pointer;
814    //   void *__saved_reg_area_end_pointer;
815    //   void *__overflow_area_pointer;
816    // };
817    //
818    // All variadic arguments are passed on the stack, but the musl implementation
819    //  uses a register save area for compatibility.
820    let va_list_addr = list.immediate();
821    let layout = bx.cx.layout_of(target_ty);
822    let ptr_align_abi = bx.tcx().data_layout.pointer_align().abi;
823    let ptr_size = bx.tcx().data_layout.pointer_size().bytes();
824
825    // Check if argument fits in register save area
826    let maybe_reg = bx.append_sibling_block("va_arg.maybe_reg");
827    let from_overflow = bx.append_sibling_block("va_arg.from_overflow");
828    let end = bx.append_sibling_block("va_arg.end");
829
830    // Load the three pointers from va_list
831    let current_ptr_addr = va_list_addr;
832    let end_ptr_addr = bx.inbounds_ptradd(va_list_addr, bx.const_usize(ptr_size));
833    let overflow_ptr_addr = bx.inbounds_ptradd(va_list_addr, bx.const_usize(2 * ptr_size));
834
835    let current_ptr = bx.load(bx.type_ptr(), current_ptr_addr, ptr_align_abi);
836    let end_ptr = bx.load(bx.type_ptr(), end_ptr_addr, ptr_align_abi);
837    let overflow_ptr = bx.load(bx.type_ptr(), overflow_ptr_addr, ptr_align_abi);
838
839    // Align current pointer based on argument type size (following LLVM's implementation)
840    // Arguments <= 32 bits (4 bytes) use 4-byte alignment, > 32 bits use 8-byte alignment
841    let type_size_bits = bx.cx.size_of(target_ty).bits();
842    let arg_align = if type_size_bits > 32 {
843        Align::from_bytes(8).unwrap()
844    } else {
845        Align::from_bytes(4).unwrap()
846    };
847    let aligned_current = round_pointer_up_to_alignment(bx, current_ptr, arg_align);
848
849    // Calculate next pointer position (following LLVM's logic)
850    // Arguments <= 32 bits take 4 bytes, > 32 bits take 8 bytes
851    let arg_size = if type_size_bits > 32 { 8 } else { 4 };
852    let next_ptr = bx.inbounds_ptradd(aligned_current, bx.const_usize(arg_size));
853
854    // Check if argument fits in register save area
855    let fits_in_regs = bx.icmp(IntPredicate::IntULE, next_ptr, end_ptr);
856    bx.cond_br(fits_in_regs, maybe_reg, from_overflow);
857
858    // Load from register save area
859    bx.switch_to_block(maybe_reg);
860    let reg_value_addr = aligned_current;
861    // Update current pointer
862    bx.store(next_ptr, current_ptr_addr, ptr_align_abi);
863    bx.br(end);
864
865    // Load from overflow area (stack)
866    bx.switch_to_block(from_overflow);
867
868    // Align overflow pointer using the same alignment rules
869    let aligned_overflow = round_pointer_up_to_alignment(bx, overflow_ptr, arg_align);
870
871    let overflow_value_addr = aligned_overflow;
872    // Update overflow pointer - use the same size calculation
873    let next_overflow = bx.inbounds_ptradd(aligned_overflow, bx.const_usize(arg_size));
874    bx.store(next_overflow, overflow_ptr_addr, ptr_align_abi);
875
876    // IMPORTANT: Also update the current saved register area pointer to match
877    // This synchronizes the pointers when switching to overflow area
878    bx.store(next_overflow, current_ptr_addr, ptr_align_abi);
879    bx.br(end);
880
881    // Return the value
882    bx.switch_to_block(end);
883    let value_addr =
884        bx.phi(bx.type_ptr(), &[reg_value_addr, overflow_value_addr], &[maybe_reg, from_overflow]);
885    bx.load(layout.llvm_type(bx), value_addr, layout.align.abi)
886}
887
888fn emit_hexagon_va_arg_bare_metal<'ll, 'tcx>(
889    bx: &mut Builder<'_, 'll, 'tcx>,
890    list: OperandRef<'tcx, &'ll Value>,
891    target_ty: Ty<'tcx>,
892) -> &'ll Value {
893    // Implementation of va_arg for Hexagon bare-metal (non-musl) targets.
894    // Based on LLVM's EmitVAArgForHexagon implementation.
895    //
896    // va_list is a simple pointer (char *)
897    let va_list_addr = list.immediate();
898    let layout = bx.cx.layout_of(target_ty);
899    let ptr_align_abi = bx.tcx().data_layout.pointer_align().abi;
900
901    // Load current pointer from va_list
902    let current_ptr = bx.load(bx.type_ptr(), va_list_addr, ptr_align_abi);
903
904    // Handle address alignment for types with alignment > 4 bytes
905    let ty_align = layout.align.abi;
906    let aligned_ptr = if ty_align.bytes() > 4 {
907        // Ensure alignment is a power of 2
908        if true {
    if !ty_align.bytes().is_power_of_two() {
        {
            ::core::panicking::panic_fmt(format_args!("Alignment is not power of 2!"));
        }
    };
};debug_assert!(ty_align.bytes().is_power_of_two(), "Alignment is not power of 2!");
909        round_pointer_up_to_alignment(bx, current_ptr, ty_align)
910    } else {
911        current_ptr
912    };
913
914    // Calculate offset: round up type size to 4-byte boundary (minimum stack slot size)
915    let type_size = layout.size.bytes();
916    let offset = type_size.next_multiple_of(4); // align to 4 bytes
917
918    // Update va_list to point to next argument
919    let next_ptr = bx.inbounds_ptradd(aligned_ptr, bx.const_usize(offset));
920    bx.store(next_ptr, va_list_addr, ptr_align_abi);
921
922    // Load and return the argument value
923    bx.load(layout.llvm_type(bx), aligned_ptr, layout.align.abi)
924}
925
926fn emit_xtensa_va_arg<'ll, 'tcx>(
927    bx: &mut Builder<'_, 'll, 'tcx>,
928    list: OperandRef<'tcx, &'ll Value>,
929    target_ty: Ty<'tcx>,
930) -> &'ll Value {
931    // Implementation of va_arg for Xtensa. There doesn't seem to be an authoritative source for
932    // this, other than "what GCC does".
933    //
934    // The va_list type has three fields:
935    // struct __va_list_tag {
936    //   int32_t *va_stk; // Arguments passed on the stack
937    //   int32_t *va_reg; // Arguments passed in registers, saved to memory by the prologue.
938    //   int32_t va_ndx; // Offset into the arguments, in bytes
939    // };
940    //
941    // The first 24 bytes (equivalent to 6 registers) come from va_reg, the rest from va_stk.
942    // Thus if va_ndx is less than 24, the next va_arg *may* read from va_reg,
943    // otherwise it must come from va_stk.
944    //
945    // Primitive arguments are never split between registers and the stack. For example, if loading an 8 byte
946    // primitive value and va_ndx = 20, we instead bump the offset and read everything from va_stk.
947    let va_list_addr = list.immediate();
948    // FIXME: handle multi-field structs that split across regsave/stack?
949    let layout = bx.cx.layout_of(target_ty);
950    let from_stack = bx.append_sibling_block("va_arg.from_stack");
951    let from_regsave = bx.append_sibling_block("va_arg.from_regsave");
952    let end = bx.append_sibling_block("va_arg.end");
953    let ptr_align_abi = bx.tcx().data_layout.pointer_align().abi;
954
955    // (*va).va_ndx
956    let va_reg_offset = 4;
957    let va_ndx_offset = va_reg_offset + 4;
958    let offset_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(va_ndx_offset));
959
960    let offset = bx.load(bx.type_i32(), offset_ptr, bx.tcx().data_layout.i32_align);
961    let offset = round_up_to_alignment(bx, offset, layout.align.abi);
962
963    let slot_size = layout.size.align_to(Align::from_bytes(4).unwrap()).bytes() as i32;
964
965    // Update the offset in va_list, by adding the slot's size.
966    let offset_next = bx.add(offset, bx.const_i32(slot_size));
967
968    // Figure out where to look for our value. We do that by checking the end of our slot (offset_next).
969    // If that is within the regsave area, then load from there. Otherwise load from the stack area.
970    let regsave_size = bx.const_i32(24);
971    let use_regsave = bx.icmp(IntPredicate::IntULE, offset_next, regsave_size);
972    bx.cond_br(use_regsave, from_regsave, from_stack);
973
974    bx.switch_to_block(from_regsave);
975    // update va_ndx
976    bx.store(offset_next, offset_ptr, ptr_align_abi);
977
978    // (*va).va_reg
979    let regsave_area_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(va_reg_offset));
980    let regsave_area = bx.load(bx.type_ptr(), regsave_area_ptr, ptr_align_abi);
981    let regsave_value_ptr = bx.inbounds_ptradd(regsave_area, offset);
982    bx.br(end);
983
984    bx.switch_to_block(from_stack);
985
986    // The first time we switch from regsave to stack we needs to adjust our offsets a bit.
987    // va_stk is set up such that the first stack argument is always at va_stk + 32.
988    // The corrected offset is written back into the va_list struct.
989
990    // let offset_corrected = cmp::max(offset, 32);
991    let stack_offset_start = bx.const_i32(32);
992    let needs_correction = bx.icmp(IntPredicate::IntULE, offset, stack_offset_start);
993    let offset_corrected = bx.select(needs_correction, stack_offset_start, offset);
994
995    // let offset_next_corrected = offset_corrected + slot_size;
996    // va_ndx = offset_next_corrected;
997    let offset_next_corrected = bx.add(offset_next, bx.const_i32(slot_size));
998    // update va_ndx
999    bx.store(offset_next_corrected, offset_ptr, ptr_align_abi);
1000
1001    // let stack_value_ptr = unsafe { (*va).va_stk.byte_add(offset_corrected) };
1002    let stack_area_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(0));
1003    let stack_area = bx.load(bx.type_ptr(), stack_area_ptr, ptr_align_abi);
1004    let stack_value_ptr = bx.inbounds_ptradd(stack_area, offset_corrected);
1005    bx.br(end);
1006
1007    bx.switch_to_block(end);
1008
1009    // On big-endian, for values smaller than the slot size we'd have to align the read to the end
1010    // of the slot rather than the start. While the ISA and GCC support big-endian, all the Xtensa
1011    // targets supported by rustc are little-endian so don't worry about it.
1012
1013    // if from_regsave {
1014    //     unsafe { *regsave_value_ptr }
1015    // } else {
1016    //     unsafe { *stack_value_ptr }
1017    // }
1018    if !(bx.tcx().sess.target.endian == Endian::Little) {
    ::core::panicking::panic("assertion failed: bx.tcx().sess.target.endian == Endian::Little")
};assert!(bx.tcx().sess.target.endian == Endian::Little);
1019    let value_ptr =
1020        bx.phi(bx.type_ptr(), &[regsave_value_ptr, stack_value_ptr], &[from_regsave, from_stack]);
1021    return bx.load(layout.llvm_type(bx), value_ptr, layout.align.abi);
1022}
1023
1024/// Determine the va_arg implementation to use. The LLVM va_arg instruction
1025/// is lacking in some instances, so we should only use it as a fallback.
1026///
1027/// <https://llvm.org/docs/LangRef.html#va-arg-instruction>
1028pub(super) fn emit_va_arg<'ll, 'tcx>(
1029    bx: &mut Builder<'_, 'll, 'tcx>,
1030    addr: OperandRef<'tcx, &'ll Value>,
1031    target_ty: Ty<'tcx>,
1032) -> &'ll Value {
1033    let layout = bx.cx.layout_of(target_ty);
1034    let target_ty_size = layout.layout.size().bytes();
1035
1036    // Some ABIs have special behavior for zero-sized types. currently `VaArgSafe` is not
1037    // implemented for any zero-sized types, so this assert should always hold.
1038    if !!bx.layout_of(target_ty).is_zst() {
    ::core::panicking::panic("assertion failed: !bx.layout_of(target_ty).is_zst()")
};assert!(!bx.layout_of(target_ty).is_zst());
1039
1040    let target = &bx.cx.tcx.sess.target;
1041    let stability = target.supports_c_variadic_definitions();
1042
1043    match target.arch {
1044        Arch::X86 => emit_ptr_va_arg(
1045            bx,
1046            addr,
1047            target_ty,
1048            PassMode::Direct,
1049            SlotSize::Bytes4,
1050            if target.is_like_windows { AllowHigherAlign::No } else { AllowHigherAlign::Yes },
1051            ForceRightAdjust::No,
1052        ),
1053        Arch::Arm64EC => emit_ptr_va_arg(
1054            bx,
1055            addr,
1056            target_ty,
1057            PassMode::Direct,
1058            SlotSize::Bytes8,
1059            if target.is_like_windows { AllowHigherAlign::No } else { AllowHigherAlign::Yes },
1060            ForceRightAdjust::No,
1061        ),
1062        Arch::AArch64 if target.is_like_windows || target.is_like_darwin => emit_ptr_va_arg(
1063            bx,
1064            addr,
1065            target_ty,
1066            PassMode::Direct,
1067            SlotSize::Bytes8,
1068            if target.is_like_windows { AllowHigherAlign::No } else { AllowHigherAlign::Yes },
1069            ForceRightAdjust::No,
1070        ),
1071        Arch::AArch64 => emit_aapcs_va_arg(bx, addr, target_ty),
1072        Arch::Arm => {
1073            // Types wider than 16 bytes are not currently supported. Clang has special logic for
1074            // such types, but `VaArgSafe` is not implemented for any type that is this large.
1075            if !(bx.cx.size_of(target_ty).bytes() <= 16) {
    ::core::panicking::panic("assertion failed: bx.cx.size_of(target_ty).bytes() <= 16")
};assert!(bx.cx.size_of(target_ty).bytes() <= 16);
1076
1077            emit_ptr_va_arg(
1078                bx,
1079                addr,
1080                target_ty,
1081                PassMode::Direct,
1082                SlotSize::Bytes4,
1083                AllowHigherAlign::Yes,
1084                ForceRightAdjust::No,
1085            )
1086        }
1087        Arch::S390x => emit_s390x_va_arg(bx, addr, target_ty),
1088        Arch::PowerPC => emit_powerpc_va_arg(bx, addr, target_ty),
1089        Arch::PowerPC64 => emit_ptr_va_arg(
1090            bx,
1091            addr,
1092            target_ty,
1093            PassMode::Direct,
1094            SlotSize::Bytes8,
1095            AllowHigherAlign::Yes,
1096            ForceRightAdjust::Yes,
1097        ),
1098        Arch::RiscV32 if target.llvm_abiname == LlvmAbi::Ilp32e => {
1099            {
    match stability {
        CVariadicStatus::Unstable { .. } => {}
        ref left_val => {
            ::core::panicking::assert_matches_failed(left_val,
                "CVariadicStatus::Unstable { .. }",
                ::core::option::Option::None);
        }
    }
};std::assert_matches!(stability, CVariadicStatus::Unstable { .. });
1100            // FIXME: clang manually adjusts the alignment for this ABI. It notes:
1101            //
1102            // > To be compatible with GCC's behaviors, we force arguments with
1103            // > 2×XLEN-bit alignment and size at most 2×XLEN bits like `long long`,
1104            // > `unsigned long long` and `double` to have 4-byte alignment. This
1105            // > behavior may be changed when RV32E/ILP32E is ratified.
1106            ::rustc_middle::util::bug::bug_fmt(format_args!("c-variadic calls with ilp32e use a custom ABI and are not currently implemented"));bug!("c-variadic calls with ilp32e use a custom ABI and are not currently implemented");
1107        }
1108        Arch::RiscV32 | Arch::LoongArch32 => emit_ptr_va_arg(
1109            bx,
1110            addr,
1111            target_ty,
1112            if target_ty_size > 2 * 4 { PassMode::Indirect } else { PassMode::Direct },
1113            SlotSize::Bytes4,
1114            AllowHigherAlign::Yes,
1115            ForceRightAdjust::No,
1116        ),
1117        Arch::RiscV64 | Arch::LoongArch64 => emit_ptr_va_arg(
1118            bx,
1119            addr,
1120            target_ty,
1121            if target_ty_size > 2 * 8 { PassMode::Indirect } else { PassMode::Direct },
1122            SlotSize::Bytes8,
1123            AllowHigherAlign::Yes,
1124            ForceRightAdjust::No,
1125        ),
1126        Arch::AmdGpu => emit_ptr_va_arg(
1127            bx,
1128            addr,
1129            target_ty,
1130            PassMode::Direct,
1131            SlotSize::Bytes4,
1132            AllowHigherAlign::No,
1133            ForceRightAdjust::No,
1134        ),
1135        Arch::Nvptx64 => emit_ptr_va_arg(
1136            bx,
1137            addr,
1138            target_ty,
1139            PassMode::Direct,
1140            SlotSize::Bytes1,
1141            AllowHigherAlign::Yes,
1142            ForceRightAdjust::No,
1143        ),
1144        Arch::Wasm32 | Arch::Wasm64 => emit_ptr_va_arg(
1145            bx,
1146            addr,
1147            target_ty,
1148            if layout.is_aggregate() || layout.is_zst() || layout.is_1zst() {
1149                PassMode::Indirect
1150            } else {
1151                PassMode::Direct
1152            },
1153            SlotSize::Bytes4,
1154            AllowHigherAlign::Yes,
1155            ForceRightAdjust::No,
1156        ),
1157        Arch::CSky => emit_ptr_va_arg(
1158            bx,
1159            addr,
1160            target_ty,
1161            PassMode::Direct,
1162            SlotSize::Bytes4,
1163            AllowHigherAlign::Yes,
1164            ForceRightAdjust::No,
1165        ),
1166        // Windows x86_64
1167        Arch::X86_64 if target.is_like_windows => emit_ptr_va_arg(
1168            bx,
1169            addr,
1170            target_ty,
1171            if target_ty_size > 8 || !target_ty_size.is_power_of_two() {
1172                PassMode::Indirect
1173            } else {
1174                PassMode::Direct
1175            },
1176            SlotSize::Bytes8,
1177            AllowHigherAlign::No,
1178            ForceRightAdjust::No,
1179        ),
1180        // This includes `target.is_like_darwin`, which on x86_64 targets is like sysv64.
1181        Arch::X86_64 => emit_x86_64_sysv64_va_arg(bx, addr, target_ty),
1182        Arch::Xtensa => emit_xtensa_va_arg(bx, addr, target_ty),
1183        Arch::Hexagon => match target.env {
1184            Env::Musl => emit_hexagon_va_arg_musl(bx, addr, target_ty),
1185            _ => emit_hexagon_va_arg_bare_metal(bx, addr, target_ty),
1186        },
1187        Arch::Sparc64 => emit_ptr_va_arg(
1188            bx,
1189            addr,
1190            target_ty,
1191            if target_ty_size > 2 * 8 { PassMode::Indirect } else { PassMode::Direct },
1192            SlotSize::Bytes8,
1193            AllowHigherAlign::Yes,
1194            // sparc64 is a big-endian target and stores variable arguments right-adjusted.
1195            ForceRightAdjust::Yes,
1196        ),
1197        Arch::Mips | Arch::Mips32r6 | Arch::Mips64 | Arch::Mips64r6 => emit_ptr_va_arg(
1198            bx,
1199            addr,
1200            target_ty,
1201            PassMode::Direct,
1202            match &target.llvm_abiname {
1203                LlvmAbi::N32 | LlvmAbi::N64 => SlotSize::Bytes8,
1204                LlvmAbi::O32 => SlotSize::Bytes4,
1205                other => ::rustc_middle::util::bug::bug_fmt(format_args!("unexpected LLVM ABI {0}",
        other))bug!("unexpected LLVM ABI {other}"),
1206            },
1207            AllowHigherAlign::Yes,
1208            // In big-endian mode the actual value is stored in the right side of the slot, meaning
1209            // that when the value is smaller than a slot, we need to adjust the pointer we read
1210            // to somewhere in the middle of the slot.
1211            match bx.tcx().sess.target.endian {
1212                Endian::Big => ForceRightAdjust::Yes,
1213                Endian::Little => ForceRightAdjust::No,
1214            },
1215        ),
1216
1217        Arch::Bpf => ::rustc_middle::util::bug::bug_fmt(format_args!("bpf does not support c-variadic functions"))bug!("bpf does not support c-variadic functions"),
1218        Arch::SpirV => ::rustc_middle::util::bug::bug_fmt(format_args!("spirv does not support c-variadic functions"))bug!("spirv does not support c-variadic functions"),
1219
1220        Arch::Sparc | Arch::Avr | Arch::M68k | Arch::Msp430 => {
1221            {
    match stability {
        CVariadicStatus::Unstable { .. } => {}
        ref left_val => {
            ::core::panicking::assert_matches_failed(left_val,
                "CVariadicStatus::Unstable { .. }",
                ::core::option::Option::None);
        }
    }
};std::assert_matches!(stability, CVariadicStatus::Unstable { .. });
1222
1223            // Clang uses the LLVM implementation for these architectures.
1224            bx.va_arg(addr.immediate(), bx.cx.layout_of(target_ty).llvm_type(bx.cx))
1225        }
1226
1227        Arch::Other(ref arch) => {
1228            {
    match stability {
        CVariadicStatus::Unstable { .. } => {}
        ref left_val => {
            ::core::panicking::assert_matches_failed(left_val,
                "CVariadicStatus::Unstable { .. }",
                ::core::option::Option::None);
        }
    }
};std::assert_matches!(stability, CVariadicStatus::Unstable { .. });
1229
1230            // Just to be safe we error out explicitly here, instead of crossing our fingers that
1231            // the default LLVM implementation has the correct behavior for this target.
1232            ::rustc_middle::util::bug::bug_fmt(format_args!("c-variadic functions are not currently implemented for custom target {0}",
        arch))bug!("c-variadic functions are not currently implemented for custom target {arch}")
1233        }
1234    }
1235}