julia> @finch_code begin
        x .= 0
        for j = _
            x[j] = Int((j * j) % 5 + 1)
        end
        y .= 0
        for j = parallel(_)
            y[x[j]] += 1
        end
    end
quote
    x_lvl = (ex.bodies[1]).tns.bind.lvl
    x_lvl_2 = x_lvl.lvl
    x_lvl_val = x_lvl.lvl.val
    y_lvl = (ex.bodies[3]).tns.bind.lvl
    y_lvl_2 = y_lvl.lvl
    y_lvl_locks = y_lvl.lvl.locks
    y_lvl_3 = y_lvl_2.lvl
    y_lvl_2_val = y_lvl_2.lvl.val
    Finch.resize_if_smaller!(x_lvl_val, x_lvl.shape)
    Finch.fill_range!(x_lvl_val, 0, 1, x_lvl.shape)
    for j_7 = 1:x_lvl.shape
        x_lvl_q = (1 - 1) * x_lvl.shape + j_7
        x_lvl_val[x_lvl_q] = (Int32)(1 + rem(j_7 * j_7, 5))
    end
    Finch.resize_if_smaller!(y_lvl_locks, y_lvl.shape)
    @inbounds for idx = 1:y_lvl.shape
            y_lvl_locks[idx] = make_lock(eltype(Vector{Base.Threads.SpinLock}))
        end
    Finch.resize_if_smaller!(y_lvl_2_val, y_lvl.shape)
    Finch.fill_range!(y_lvl_2_val, 0.0, 1, y_lvl.shape)
    val = x_lvl_val
    x_lvl_val = moveto(x_lvl_val, CPU(Threads.nthreads()))
    locksArray = y_lvl_locks
    y_lvl_locks = moveto(y_lvl_locks, CPU(Threads.nthreads()))
    val_2 = y_lvl_2_val
    y_lvl_2_val = moveto(y_lvl_2_val, CPU(Threads.nthreads()))
    Threads.@threads for i = 1:Threads.nthreads()
            phase_start_2 = max(1, 1 + fld(x_lvl.shape * (-1 + i), Threads.nthreads()))
            phase_stop_2 = min(x_lvl.shape, fld(x_lvl.shape * i, Threads.nthreads()))
            if phase_stop_2 >= phase_start_2
                for j_10 = phase_start_2:phase_stop_2
                    x_lvl_q_2 = (1 - 1) * x_lvl.shape + j_10
                    x_lvl_2_val = x_lvl_val[x_lvl_q_2]
                    y_lvl_q = (1 - 1) * y_lvl.shape + x_lvl_2_val
                    y_lvl_2atomicArraysAcc = get_lock(CPU(Threads.nthreads()), y_lvl_locks, y_lvl_q, eltype(Vector{Base.Threads.SpinLock}))
                    aquire_lock!(CPU(Threads.nthreads()), y_lvl_2atomicArraysAcc)
                    y_lvl_2_val[y_lvl_q] = 1 + y_lvl_2_val[y_lvl_q]
                    release_lock!(CPU(Threads.nthreads()), y_lvl_2atomicArraysAcc)
                end
            end
        end
    qos = 1 * x_lvl.shape
    resize!(val, qos)
    qos_2 = 1 * y_lvl.shape
    resize!(locksArray, qos_2)
    resize!(val_2, qos_2)
    (x = Tensor((DenseLevel){Int32}(x_lvl_2, x_lvl.shape)), y = Tensor((DenseLevel){Int32}((AtomicLevel){Vector{Base.Threads.SpinLock}, ElementLevel{0.0, Float64, Int32, Vector{Float64}}}(y_lvl_3, locksArray), y_lvl.shape)))
end
julia> @finch begin
        x .= 0
        for j = _
            x[j] = Int((j * j) % 5 + 1)
        end
        y .= 0
        for j = parallel(_)
            y[x[j]] += 1
        end
    end
(x = Tensor(Dense{Int32}(Element{0, Int32, Int32}([2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1]), 100)), y = Tensor(Dense{Int32}(Atomic([Base.Threads.SpinLock(0), Base.Threads.SpinLock(0), Base.Threads.SpinLock(0), Base.Threads.SpinLock(0), Base.Threads.SpinLock(0)], Element{0.0, Float64, Int32}([20.0, 40.0, 0.0, 0.0, 40.0])), 5)))
julia> @finch begin
        xp .= 0
        for j = _
            xp[j] = Int((j * j) % 5 + 1)
        end
        yp .= 0
        for j = _
            yp[x[j]] += 1
        end
    end
(xp = Tensor(Dense{Int32}(Element{0, Int32, Int32}([2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1]), 100)), yp = Tensor(Dense{Int32}(Element{0.0, Float64, Int32}([20.0, 40.0, 0.0, 0.0, 40.0]), 5)))

