julia> @finch_code begin
        x .= 0
        for j = _
            x[j] = Int((j * j) % 5 + 1)
        end
        y .= 0
        for j = parallel(_)
            y[x[j]] += 1
        end
    end
quote
    x_lvl = ((ex.bodies[1]).bodies[1]).tns.bind.lvl
    x_lvl_stop = x_lvl.shape
    x_lvl_2 = x_lvl.lvl
    x_lvl_2_val = x_lvl_2.val
    y_lvl = ((ex.bodies[1]).bodies[3]).tns.bind.lvl
    y_lvl_stop = y_lvl.shape
    y_lvl_2 = y_lvl.lvl
    y_lvl_2_locks = y_lvl_2.locks
    y_lvl_3 = y_lvl_2.lvl
    y_lvl_3_val = y_lvl_3.val
    n = (Base.Threads).nthreads()
    Finch.resize_if_smaller!(x_lvl_2_val, x_lvl_stop)
    Finch.fill_range!(x_lvl_2_val, 0, 1, x_lvl_stop)
    for j_7 = 1:x_lvl_stop
        x_lvl_q = (1 - 1) * x_lvl_stop + j_7
        x_lvl_2_val[x_lvl_q] = (Int32)(1 + rem(j_7 * j_7, 5))
    end
    Finch.resize_if_smaller!(y_lvl_2_locks, y_lvl_stop)
    @inbounds for idx = 1:y_lvl_stop
            y_lvl_2_locks[idx] = Finch.make_lock(eltype(Vector{Base.Threads.SpinLock}))
        end
    Finch.resize_if_smaller!(y_lvl_3_val, y_lvl_stop)
    Finch.fill_range!(y_lvl_3_val, 0, 1, y_lvl_stop)
    resize!(x_lvl_2_val, x_lvl_stop)
    x_lvl_2_val_2 = (Finch).transfer(Finch.CPUSharedMemory(Finch.CPU(n)), x_lvl_2_val)
    y_lvl_3_val_2 = (Finch).transfer(Finch.CPUSharedMemory(Finch.CPU(n)), y_lvl_3_val)
    y_lvl_2_locks_2 = (Finch).transfer(Finch.CPUSharedMemory(Finch.CPU(n)), y_lvl_2_locks)
    Threads.@threads :dynamic for tid = 1:n
            Finch.@barrier begin
                    @inbounds @fastmath(begin
                                x_lvl_2_val_3 = (Finch).transfer(Finch.CPUThread(tid, Finch.CPU(n), Finch.Serial()), x_lvl_2_val_2)
                                y_lvl_3_val_3 = (Finch).transfer(Finch.CPUThread(tid, Finch.CPU(n), Finch.Serial()), y_lvl_3_val_2)
                                y_lvl_2_locks_3 = (Finch).transfer(Finch.CPUThread(tid, Finch.CPU(n), Finch.Serial()), y_lvl_2_locks_2)
                                phase_start_2 = max(1, 1 + fld(x_lvl_stop * (-1 + tid), n))
                                phase_stop_2 = min(x_lvl_stop, fld(x_lvl_stop * tid, n))
                                if phase_stop_2 >= phase_start_2
                                    for j_10 = phase_start_2:phase_stop_2
                                        x_lvl_q_2 = (1 - 1) * x_lvl_stop + j_10
                                        x_lvl_2_val_4 = x_lvl_2_val_3[x_lvl_q_2]
                                        y_lvl_q = (1 - 1) * y_lvl_stop + x_lvl_2_val_4
                                        y_lvl_2atomicArraysAcc = Finch.get_lock(Finch.CPU(n), y_lvl_2_locks_3, y_lvl_q, eltype(Vector{Base.Threads.SpinLock}))
                                        Finch.aquire_lock!(Finch.CPU(n), y_lvl_2atomicArraysAcc)
                                        y_lvl_3_val_3[y_lvl_q] = 1 + y_lvl_3_val_3[y_lvl_q]
                                        Finch.release_lock!(Finch.CPU(n), y_lvl_2atomicArraysAcc)
                                    end
                                end
                                phase_start_3 = max(1, 1 + fld(x_lvl_stop * tid, n))
                                if x_lvl_stop >= phase_start_3
                                    x_lvl_stop + 1
                                end
                            end)
                    nothing
                end
        end
    resize!(y_lvl_2_locks_2, y_lvl_stop)
    resize!(y_lvl_3_val_2, y_lvl_stop)
    (x = Tensor((DenseLevel){Int32}(ElementLevel{0, Int32, Int32}(x_lvl_2_val_2), x_lvl_stop)), y = Tensor((DenseLevel){Int32}((MutexLevel){Vector{Base.Threads.SpinLock}, ElementLevel{0, Int32, Int32, Vector{Int32}}}(ElementLevel{0, Int32, Int32}(y_lvl_3_val_2), y_lvl_2_locks_2), y_lvl_stop)))
end
julia> @finch begin
        x .= 0
        for j = _
            x[j] = Int((j * j) % 5 + 1)
        end
        y .= 0
        for j = parallel(_)
            y[x[j]] += 1
        end
    end
(x = Tensor(Dense{Int32}(Element{0, Int32, Int32}([2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1]), 100)), y = Tensor(Dense{Int32}(Mutex(Element{0, Int32, Int32}([20, 40, 0, 0, 40]), [Base.Threads.SpinLock(0), Base.Threads.SpinLock(0), Base.Threads.SpinLock(0), Base.Threads.SpinLock(0), Base.Threads.SpinLock(0)]), 5)))
julia> @finch begin
        xp .= 0
        for j = _
            xp[j] = Int((j * j) % 5 + 1)
        end
        yp .= 0
        for j = _
            yp[x[j]] += 1
        end
    end
(xp = Tensor(Dense{Int32}(Element{0, Int32, Int32}([2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1, 2, 5, 5, 2, 1]), 100)), yp = Tensor(Dense{Int32}(Element{0.0, Float64, Int32}([20.0, 40.0, 0.0, 0.0, 40.0]), 5)))

