Hi,
I notice seems the gradient calculation through GPU is not properly working as expected.
Is that I make some mistakes?
The test code as follows
"""
using Yao, YaoExtensions
using Profile
using CuYao
n = 18; depth = 1000;
circuit = dispatch!(
variational_circuit(n, depth),
:random);
gatecount(circuit)
nparameters(circuit)
h = heisenberg(n);
cureg = cu(zero_state(n))
@time expect(h, cureg =>circuit)
@time _, grad = expect'(h, cureg=>circuit)
for 14 qubits, it is around 6 times difference
for 12, around 4 times
for 16
@Profile _, grad = expect'(h, cureg=>circuit)
Profile.print(format=:flat)
"""
and its profile
nohup: ignoring input
26.439291 seconds (52.09 M allocations: 2.923 GiB, 4.33% gc time, 17.06% compilation time)
18.178448 seconds (42.04 M allocations: 2.142 GiB, 6.94% gc time, 3.21% compilation time)
Count Overhead File Line Function
===== ======== ==== ==== ========
19 19 @Base/Base.jl 33 getproperty
3600 0 @Base/Base.jl 386 include(mod::Module, _path::String)
3 3 @Base/Base.jl 34 setproperty!
12 12 @Base/Base.jl 65 time_ns
1 0 @Base/abstractarray.jl 1197 _getindex
1 0 @Base/abstractarray.jl 70 axes
2 0 @Base/abstractarray.jl 89 axes
1349 0 @Base/abstractarray.jl 1056 copyto_axcheck!
1 0 @Base/abstractarray.jl 1170 getindex
1 0 @Base/abstractarray.jl 250 length
136 0 @Base/abstractarray.jl 784 similar
136 0 @Base/abstractarray.jl 785 similar
7 2 @Base/abstractarraymath.jl 70 #dropdims#187
1 1 @Base/abstractarraymath.jl 71 _dropdims(A::CUDA.CuArray{ComplexF64, 2, CUDA.Mem....
1 0 @Base/abstractarraymath.jl 74 _dropdims(A::CUDA.CuArray{ComplexF64, 2, CUDA.Mem....
3 0 @Base/abstractarraymath.jl 85 _dropdims(A::CUDA.CuArray{ComplexF64, 2, CUDA.Mem....
7 0 @Base/abstractarraymath.jl 70 dropdims##kw
24 0 @Base/abstractarraymath.jl 41 vec
32 0 @Base/abstractdict.jl 17 haskey
1 0 @Base/array.jl 1643 #reverse#91
1353 0 @Base/array.jl 540 Array
31 0 @Base/array.jl 670 _array_for
5 5 @Base/array.jl 884 _growend!
39 0 @Base/array.jl 449 fill
39 0 @Base/array.jl 451 fill
1 0 @Base/array.jl 335 fill!
2 2 @Base/array.jl ? get!(constructor::CUDA.var"#192#193", x::CUDA.PerD...
2 2 @Base/array.jl ? getindex
9 0 @Base/array.jl 391 getindex
20 20 @Base/array.jl 801 getindex
1 1 @Base/array.jl 205 isassigned
5 1 @Base/array.jl 777 iterate
5 0 @Base/array.jl 929 push!
1 0 @Base/array.jl 1643 reverse
2 2 @Base/array.jl 839 setindex!
1 1 @Base/array.jl 839 setindex!(A::Vector{Float64}, x::Float64, i1::Int64)
1 1 @Base/array.jl ? task_local_state!()
31 0 @Base/array.jl 108 vect
3 0 @Base/arraymath.jl 52 *(A::ComplexF64, B::Vector{ComplexF64})
2 0 @Base/arraymath.jl 55 *(A::Vector{ComplexF64}, B::ComplexF64)
10 0 @Base/arraymath.jl 39 -(A::LinearAlgebra.Diagonal{ComplexF64, Vector{Com...
68 0 @Base/arraymath.jl 30 conj(A::CUDA.CuArray{ComplexF64, 1, CUDA.Mem.Devic...
1 0 @Base/asyncevent.jl 21 Base.AsyncCondition()
1 0 @Base/asyncevent.jl 45 AsyncCondition
1 0 @Base/asyncevent.jl 101 _trywait(t::Timer)
1 0 @Base/asyncevent.jl 104 _trywait(t::Base.AsyncCondition)
2044 0 @Base/asyncevent.jl 111 _trywait(t::Timer)
3 0 @Base/asyncevent.jl 113 _trywait(t::Timer)
1 0 @Base/asyncevent.jl 171 _uv_hook_close(t::Timer)
1 0 @Base/asyncevent.jl 47 macro expansion
1072 20 @Base/asyncevent.jl 129 wait
6 6 @Base/atomics.jl 405 atomic_add!
1 1 @Base/atomics.jl 358 getindex
1 0 @Base/bool.jl 106 *
84 68 @Base/boot.jl 448 Array
4 4 @Base/boot.jl 450 Array
75 0 @Base/boot.jl 457 Array
4 0 @Base/boot.jl 458 Array
4 4 @Base/boot.jl 460 Array
10 0 @Base/boot.jl 465 Array
9 0 @Base/boot.jl 467 Array
1353 0 @Base/boot.jl 472 Array
3600 2 @Base/boot.jl 360 eval
2 2 @Base/boot.jl 362 kwfunc(f::Any)
3 0 @Base/broadcast.jl 621 _broadcast_getindex
3 2 @Base/broadcast.jl 648 _broadcast_getindex_evalf
1 0 @Base/broadcast.jl 207 axes
83 0 @Base/broadcast.jl 872 broadcast_preserving_zero_d
1 0 @Base/broadcast.jl 484 combine_axes
15 0 @Base/broadcast.jl 908 copy
3 0 @Base/broadcast.jl 930 copy
4 0 @Base/broadcast.jl 936 copyto!
4 0 @Base/broadcast.jl 983 copyto!
2 0 @Base/broadcast.jl 1078 copyto_nonleaf!(dest::Vector{Float64}, bc::Base.Br...
1 0 @Base/broadcast.jl 1080 copyto_nonleaf!(dest::Vector{Float64}, bc::Base.Br...
3 0 @Base/broadcast.jl 575 getindex
1 0 @Base/broadcast.jl 266 instantiate
2 0 @Base/broadcast.jl 984 macro expansion
291 0 @Base/broadcast.jl 883 materialize
10 0 @Base/broadcast.jl 196 similar
10 0 @Base/broadcast.jl 197 similar
3600 0 @Base/client.jl 485 _start()
3600 0 @Base/client.jl 285 exec_options(opts::Base.JLOptions)
2 0 @Base/complex.jl 277 *
1 0 @Base/complex.jl 333 /
1 1 @Base/complex.jl 12 Complex
1 1 @Base/complex.jl 849 cos(z::ComplexF64)
1 0 @Base/complex.jl 863 cos(z::ComplexF64)
5 0 @Base/complex.jl 864 cos(z::ComplexF64)
1 0 @Base/complex.jl 844 sin(z::ComplexF64)
2 0 @Base/condition.jl 124 #notify#515
15 1 @Base/condition.jl 73 lock
2 0 @Base/condition.jl 124 notify
2 0 @Base/condition.jl 130 notify(c::Base.GenericCondition{Base.Threads.SpinL...
2 0 @Base/condition.jl 74 unlock
3081 0 @Base/condition.jl 106 wait(c::Base.GenericCondition{ReentrantLock})
6 0 @Base/dict.jl 505 get
2 0 @Base/dict.jl 506 get(h::Dict{UInt64, Any}, key::UInt64, default::No...
3 0 @Base/dict.jl 169 hashindex
1 1 @Base/dict.jl ? ht_keyindex(h::Dict{UInt64, Any}, key::UInt64)
1 0 @Base/dict.jl 283 ht_keyindex(h::Dict{UInt64, Any}, key::UInt64)
3 0 @Base/dict.jl 284 ht_keyindex(h::Dict{CUDA.CuContext, Dict{UInt64, A...
1 0 @Base/dict.jl 288 ht_keyindex(h::Dict{CUDA.CuContext, Dict{UInt64, A...
1 1 @Base/dict.jl 292 ht_keyindex(h::Dict{CUDA.CuContext, Dict{UInt64, A...
1 0 @Base/dict.jl 171 isslotempty
1 0 @Base/div.jl 229 cld
1 0 @Base/div.jl 273 div
40 40 @Base/env.jl 38 _getenv(var::String)
40 0 @Base/env.jl 42 access_env
40 0 @Base/env.jl 80 get
2 0 @Base/essentials.jl 602 isempty
1 0 @Base/essentials.jl 767 isempty
7 7 @Base/float.jl 332 *
2 2 @Base/float.jl 326 +
1 1 @Base/float.jl 365 ==
1 0 @Base/float.jl 401 ==
2 2 @Base/float.jl 339 muladd
2 0 @Base/float.jl 296 round
2 2 @Base/float.jl 307 round
1 0 @Base/floatfuncs.jl 281 #isapprox#659
1 1 @Base/floatfuncs.jl 5 copysign
1 0 @Base/floatfuncs.jl 281 isapprox
5 5 @Base/gcutils.jl 121 enable_finalizers
6 6 @Base/gcutils.jl 48 finalizer
1 0 @Base/generator.jl 44 iterate
4 0 @Base/hashing.jl 18 hash
38 0 @Base/hashing.jl 23 hash
8 0 @Base/hashing.jl 71 hash
4 0 @Base/hashing.jl 72 hash
8 0 @Base/hashing.jl 73 hash
1 0 @Base/hashing.jl 29 hash_64_64
5 0 @Base/hashing.jl 30 hash_64_64
1 0 @Base/hashing.jl 31 hash_64_64
3 0 @Base/hashing.jl 32 hash_64_64
2 0 @Base/hashing.jl 33 hash_64_64
4 0 @Base/hashing.jl 34 hash_64_64
1 0 @Base/hashing.jl 35 hash_64_64
7 0 @Base/hashing.jl 63 hash_uint
10 0 @Base/hashing.jl 62 hash_uint64
33 31 @Base/iddict.jl 87 get
26 26 @Base/iddict.jl 92 getindex
32 0 @Base/iddict.jl 174 in
2 2 @Base/int.jl 88 *
1 0 @Base/int.jl 923 *
4 4 @Base/int.jl 87 +
1 0 @Base/int.jl 921 +
2 2 @Base/int.jl 86 -
4 4 @Base/int.jl 83 <
3 3 @Base/int.jl 441 <
3 0 @Base/int.jl 448 <
1 1 @Base/int.jl 457 <<
1 0 @Base/int.jl 464 <<
8 8 @Base/int.jl 456 >>
8 0 @Base/int.jl 462 >>
2 2 @Base/int.jl 262 rem
1 1 @Base/int.jl 477 rem
4 4 @Base/int.jl 333 xor
1 1 @Base/int.jl 285 ~
1 0 @Base/iterators.jl 110 iterate
2 0 @Base/iterators.jl 158 iterate
2 0 @Base/iterators.jl 159 iterate
1 0 @Base/iterators.jl 449 iterate
1 0 @Base/iterators.jl 116 reverse
1 1 @Base/libc.jl 355 malloc
4 4 @Base/libuv.jl 48 iolock_begin
1 0 @Base/libuv.jl 57 preserve_handle
505 505 @Base/libuv.jl 104 process_events
1 0 @Base/linked_list.jl 113 list_deletefirst!
1 0 @Base/linked_list.jl 86 popfirst!(q::Base.InvasiveLinkedList{Task})
3600 0 @Base/loading.jl 1148 _include(mapexpr::Function, mod::Module, _path::St...
3600 0 @Base/loading.jl 1094 include_string(mapexpr::typeof(identity), mod::Mod...
1 1 @Base/lock.jl 332 Event
3 3 @Base/lock.jl 86 lock(rl::ReentrantLock)
13 0 @Base/lock.jl 91 lock(rl::ReentrantLock)
1 1 @Base/lock.jl 108 lock(rl::ReentrantLock)
1 1 @Base/lock.jl 348 notify(e::Base.Event)
1 0 @Base/lock.jl 353 notify(e::Base.Event)
3 3 @Base/lock.jl 119 unlock(rl::ReentrantLock)
2 0 @Base/lock.jl 127 unlock(rl::ReentrantLock)
2 0 @Base/lock.jl 138 unlock(rl::ReentrantLock)
3 0 @Base/lock.jl 139 unlock(rl::ReentrantLock)
1037 0 @Base/lock.jl 340 wait(e::Base.Event)
1 1 @Base/locks-mt.jl 43 _get
1 1 @Base/locks-mt.jl 50 _set!
4 4 @Base/locks-mt.jl 36 _xchg!
9 9 @Base/locks-mt.jl 61 lock(l::Base.Threads.SpinLock)
3 0 @Base/locks-mt.jl 63 lock(l::Base.Threads.SpinLock)
4 0 @Base/locks-mt.jl 65 lock(l::Base.Threads.SpinLock)
25 0 @Base/locks-mt.jl 66 lock(l::Base.Threads.SpinLock)
2 2 @Base/locks-mt.jl 89 unlock(l::Base.Threads.SpinLock)
1 0 @Base/locks-mt.jl 91 unlock(l::Base.Threads.SpinLock)
3 0 @Base/locks-mt.jl 92 unlock(l::Base.Threads.SpinLock)
3 3 @Base/locks-mt.jl 94 unlock(l::Base.Threads.SpinLock)
2 2 @Base/logging.jl 477 current_logger_for_env(std_level::Base.CoreLogging...
1 0 @Base/logging.jl 478 current_logger_for_env(std_level::Base.CoreLogging...
50 2 @Base/logging.jl 479 current_logger_for_env(std_level::Base.CoreLogging...
1 1 @Base/logging.jl 473 current_logstate
2 2 @Base/logging.jl 519 env_override_minlevel(group::Symbol, _module::Module)
40 0 @Base/logging.jl 520 env_override_minlevel(group::Symbol, _module::Module)
5 5 @Base/logging.jl 521 env_override_minlevel(group::Symbol, _module::Module)
1 0 @Base/logging.jl 540 env_override_minlevel(group::Symbol, _module::Module)
54 2 @Base/logging.jl 356 macro expansion
1 1 @Base/logging.jl 357 macro expansion
2 0 @Base/math.jl 131 evalpoly
4 0 @Base/math.jl 407 log
2 0 @Base/math.jl 132 macro expansion
1 1 @Base/multidimensional.jl 399 __inc
1 0 @Base/multidimensional.jl 380 iterate
344 0 none ? #call#239
275 4 none ? call(::CUDA.HostKernel{CuYao.var"#kernel#26"{BitBa...
344 0 none ? call##kw
391 95 none ? (::CUDA.var"#call##kw")(::NamedTuple{(:threads, :b...
615 0 none ? convert_arguments
615 0 none ? macro expansion
526 2 none ? pack_arguments(::CUDA.var"#39#40"{Bool, Int64, CUD...
2 0 @Base/operators.jl 204 !=
7 0 @Base/operators.jl 560 *
3 0 @Base/operators.jl 560 +
8 0 @Base/operators.jl 938 ComposedFunction
1 0 @Base/operators.jl 1131 in
1354 0 @Base/operators.jl 858 |>
31 31 @Base/promotion.jl 410 ==
1 0 @Base/range.jl 22 Colon
1 1 @Base/range.jl 208 StepRange
1 0 @Base/range.jl 263 StepRange
1 0 @Base/range.jl 24 _colon
1 0 @Base/range.jl 1045 _reverse
6 0 @Base/reduce.jl 160 #mapfoldl#214
6 0 @Base/reduce.jl 287 #mapreduce#218
6 0 @Base/reduce.jl 456 #reduce#220
1 0 @Base/reduce.jl 880 _any
1 0 @Base/reduce.jl 56 _foldl_impl(op::Base.FilteringRF{BitBasis.var"#35#...
1 0 @Base/reduce.jl 415 _mapreduce(f::YaoBlocks.var"#117#118"{ArrayReg{2, ...
1 1 @Base/reduce.jl 144 _xfadjoint
1 0 @Base/reduce.jl 871 any
1 0 @Base/reduce.jl 48 foldl_impl(op::Base.FilteringRF{BitBasis.var"#35#3...
1 1 @Base/reduce.jl 53 getproperty
1 0 @Base/reduce.jl 245 macro expansion
6 0 @Base/reduce.jl 160 mapfoldl
3 0 @Base/reduce.jl 43 mapfoldl_impl(f::typeof(identity), op::typeof(+), ...
3 2 @Base/reduce.jl 44 mapfoldl_impl(f::typeof(identity), op::typeof(+), ...
6 0 @Base/reduce.jl 287 mapreduce
1 0 @Base/reduce.jl 243 mapreduce_impl(f::YaoBlocks.var"#117#118"{ArrayReg...
1 0 @Base/reduce.jl 257 mapreduce_impl
6 0 @Base/reduce.jl 456 reduce
793 0 @Base/reducedim.jl 899 #_sum#701
793 0 @Base/reducedim.jl 900 #_sum#702
1 0 @Base/reducedim.jl 310 #mapreduce#672
793 0 @Base/reducedim.jl 873 #sum#679
1 0 @Base/reducedim.jl 318 _mapreduce_dim
793 0 @Base/reducedim.jl 899 _sum
793 0 @Base/reducedim.jl 900 _sum
1 0 @Base/reducedim.jl 310 mapreduce
793 0 @Base/reducedim.jl 873 sum##kw
31 31 @Base/reflection.jl 291 objectid
4 0 @Base/refpointer.jl 135 Ref
4 4 @Base/refvalue.jl 7 RefValue
1 0 @Base/refvalue.jl 56 getindex
24 0 @Base/reshapedarray.jl 116 reshape
1 1 @Base/simdloop.jl ? macro expansion
3 0 @Base/simdloop.jl 77 macro expansion
1 0 @Base/simdloop.jl 79 macro expansion
1 1 @Base/sort.jl 729 sort!(v::Vector{Int64}; alg::Base.Sort.QuickSortAl...
3 2 @Base/sort.jl 747 sort!(v::Vector{Int64}; alg::Base.Sort.QuickSortAl...
1 1 @Base/sort.jl 597 sort!(v::Vector{Int64}, lo::Int64, hi::Int64, a::B...
4 0 @Base/sort.jl 735 sort!
1 1 @Base/special/hyperbolic.jl 119 cosh(x::Float64)
3 0 @Base/special/hyperbolic.jl 82 sinh(x::Float64)
1 0 @Base/special/hyperbolic.jl 48 sinh_kernel(x::Float64)
2 0 @Base/special/hyperbolic.jl 50 sinh_kernel(x::Float64)
1 0 @Base/special/log.jl 275 log(x::Float64)
3 0 @Base/special/log.jl 279 log(x::Float64)
1 0 @Base/special/log.jl 158 log_proc1
1 0 @Base/special/log.jl 171 log_proc1
1 0 @Base/special/log.jl 178 log_proc1
1 0 @Base/special/trig.jl 81 sin_kernel
1 0 @Base/special/trig.jl 177 sincos(x::Float64)
1 0 @Base/special/trig.jl 209 sincos_kernel
998 0 @Base/task.jl 406 (::Base.var"#578#579"{CUDA.var"#47#48"{CUDA.var"#1...
2 0 @Base/task.jl 642 schedule(t::Task, arg::Any; error::Bool)
4 4 @Base/task.jl 126 current_task
3 3 @Base/task.jl 572 enq_work(t::Task)
3 3 @Base/task.jl 573 enq_work(t::Task)
3 0 @Base/task.jl 574 enq_work(t::Task)
52 5 @Base/task.jl 585 enq_work(t::Task)
68 68 @Base/task.jl 595 enq_work(t::Task)
6 0 @Base/task.jl 243 get_task_tls
7 7 @Base/task.jl 246 get_task_tls
6 6 @Base/task.jl 165 getproperty
47 0 @Base/task.jl 382 macro expansion
1 1 @Base/task.jl ? popfirst!(W::Base.InvasiveLinkedListSynchronized{T...
11 11 @Base/task.jl 541 popfirst!(W::Base.InvasiveLinkedListSynchronized{T...
13 2 @Base/task.jl 542 popfirst!(W::Base.InvasiveLinkedListSynchronized{T...
6 6 @Base/task.jl 543 popfirst!(W::Base.InvasiveLinkedListSynchronized{T...
6 5 @Base/task.jl 544 popfirst!(W::Base.InvasiveLinkedListSynchronized{T...
3 0 @Base/task.jl 546 popfirst!(W::Base.InvasiveLinkedListSynchronized{T...
1 1 @Base/task.jl 752 poptask(W::Base.InvasiveLinkedListSynchronized{Task})
39 0 @Base/task.jl 753 poptask(W::Base.InvasiveLinkedListSynchronized{Task})
1 1 @Base/task.jl 754 poptask(W::Base.InvasiveLinkedListSynchronized{Task})
92430 92427 @Base/task.jl 755 poptask(W::Base.InvasiveLinkedListSynchronized{Task})
1 1 @Base/task.jl 758 poptask(W::Base.InvasiveLinkedListSynchronized{Task})
6 6 @Base/task.jl 515 push!(W::Base.InvasiveLinkedListSynchronized{Task}...
16 1 @Base/task.jl 516 push!(W::Base.InvasiveLinkedListSynchronized{Task}...
8 8 @Base/task.jl 517 push!(W::Base.InvasiveLinkedListSynchronized{Task}...
6 6 @Base/task.jl 518 push!(W::Base.InvasiveLinkedListSynchronized{Task}...
5 2 @Base/task.jl 520 push!(W::Base.InvasiveLinkedListSynchronized{Task}...
2 2 @Base/task.jl 522 push!(W::Base.InvasiveLinkedListSynchronized{Task}...
47 0 @Base/task.jl 599 schedule
2 0 @Base/task.jl 633 schedule##kw
89352 0 @Base/task.jl 489 task_done_hook(t::Task)
16 0 @Base/task.jl 241 task_local_storage
3 2 @Base/task.jl 237 threadid
4 4 @Base/task.jl 693 try_yieldto(undo::typeof(Base.ensure_rescheduled))
12 12 @Base/task.jl 694 try_yieldto(undo::typeof(Base.ensure_rescheduled))
5 5 @Base/task.jl 695 try_yieldto(undo::typeof(Base.ensure_rescheduled))
1 0 @Base/task.jl 700 try_yieldto(undo::typeof(Base.ensure_rescheduled))
2 0 @Base/task.jl 708 try_yieldto(undo::typeof(Base.ensure_rescheduled))
1 1 @Base/task.jl 738 trypoptask(W::Base.InvasiveLinkedListSynchronized{...
39 2 @Base/task.jl 739 trypoptask
1 0 @Base/task.jl 762 wait
92472 2 @Base/task.jl 763 wait
23 0 @Base/task.jl 764 wait
505 0 @Base/task.jl 765 wait
1 1 @Base/task.jl 653 yield()
82 2 @Base/task.jl 655 yield()
9 9 @Base/task.jl 656 yield()
568 0 @Base/task.jl 657 yield()
1072 0 @Base/threadingconstructs.jl 169 (::CUDA.var"#14#17"{CUDA.CuStream, Timer, CUDA.CuD...
1038 0 @Base/threadingconstructs.jl 169 (::CUDA.var"#15#18"{Timer, Base.Event})()
47 0 @Base/threadingconstructs.jl 178 macro expansion
6 0 @Base/timing.jl 286 macro expansion
235 0 @Base/timing.jl 287 macro expansion
6 0 @Base/timing.jl 288 macro expansion
46 0 @Base/tuple.jl 303 Tuple
46 41 @Base/tuple.jl 334 _totuple
1 1 @Base/tuple.jl 29 getindex
2 2 @Base/tuple.jl 86 indexed_iterate
1 0 @Base/tuple.jl 66 iterate
1 1 @Base/tuple.jl 213 map
3 2 @Base/tuple.jl 214 map
1 0 @Base/tuple.jl 215 map
1 1 @Base/tuple.jl 216 map(f::typeof(CUDA.cudaconvert), t::Tuple{typeof(i...
1 0 @Base/tuple.jl 216 map
1 0 @Base/tuple.jl 480 prod
3 0 @Base/util.jl 450 #PTXCompilerTarget#11
3 0 @Base/util.jl 450 Type##kw
1 1 @Base/version.jl ? hash(v::VersionNumber, h::UInt64)
1 1 @Base/version.jl 199 hash(v::VersionNumber, h::UInt64)
4 0 @Base/version.jl 201 hash(v::VersionNumber, h::UInt64)
1 0 @Base/version.jl 203 hash(v::VersionNumber, h::UInt64)
3 0 ....6/LinearAlgebra/src/diagonal.jl 174 *
4 0 ....6/LinearAlgebra/src/diagonal.jl 157 -
1 0 ...gebra/src/structuredbroadcast.jl 123 fzero
1 0 ...gebra/src/structuredbroadcast.jl 110 fzeropreserving
1 0 ...gebra/src/structuredbroadcast.jl 128 similar
4 0 ...gebra/src/structuredbroadcast.jl 131 similar
1 1 @Adapt/src/Adapt.jl 40 adapt(to::CUDA.Adaptor, x::Function)
4 0 @Adapt/src/Adapt.jl 40 adapt
1 0 @Adapt/src/base.jl 31 #7
3 1 @Adapt/src/base.jl 11 adapt_structure
1 0 @Adapt/src/base.jl 30 adapt_structure
1 0 @Adapt/src/base.jl 30 adapt_structure(to::CUDA.Adaptor, bc::Base.Broadca...
4 0 @BitBasis/src/iterate_control.jl 107 group_shift!(nbits::Int64, positions::Vector{Int64})
5 0 @BitBasis/src/iterate_control.jl 108 group_shift!(nbits::Int64, positions::Vector{Int64})
4 0 @BitBasis/src/iterate_control.jl 109 group_shift!(nbits::Int64, positions::Vector{Int64})
4 0 @BitBasis/src/iterate_control.jl 115 group_shift!(nbits::Int64, positions::Vector{Int64})
2 0 @BitBasis/src/iterate_control.jl 116 group_shift!(nbits::Int64, positions::Vector{Int64})
1 1 @BitBasis/src/iterate_control.jl 53 itercontrol(nbits::Int64, positions::Vector{Int64}...
7 0 @BitBasis/src/iterate_control.jl 54 itercontrol(nbits::Int64, positions::Vector{Int64}...
19 0 @BitBasis/src/iterate_control.jl 55 itercontrol(nbits::Int64, positions::Vector{Int64}...
58 12 @BitBasis/src/iterate_control.jl 57 itercontrol(nbits::Int64, positions::Vector{Int64}...
1 0 @BitBasis/src/iterate_control.jl 80 length
1 0 @BitBasis/src/operations.jl 107 bmask
7 0 @BitBasis/src/operations.jl 110 bmask
1 0 @BitBasis/src/operations.jl 114 bmask
10 0 @CUDA/lib/cudadrv/devices.jl 175 attribute
1 0 @CUDA/lib/cudadrv/devices.jl 65 convert
2 0 @CUDA/lib/cudadrv/devices.jl 23 current_device
5 0 @CUDA/lib/cudadrv/devices.jl 186 warpsize
67 0 @CUDA/lib/cudadrv/error.jl 80 initialize_context
1134 0 @CUDA/lib/cudadrv/error.jl 97 macro expansion
505 2 @CUDA/lib/cudadrv/execution.jl 69 #39
615 8 @CUDA/lib/cudadrv/execution.jl 136 #44
1 0 @CUDA/lib/cudadrv/execution.jl 147 #47
615 0 @CUDA/lib/cudadrv/execution.jl 135 #cudacall#43
7 7 @CUDA/lib/cudadrv/execution.jl 52 launch(::CUDA.CuFunction, ::CUDA.KernelState, ::Fu...
470 0 @CUDA/lib/cudadrv/execution.jl 62 #launch#38
67 11 @CUDA/lib/cudadrv/execution.jl 62 launch(::CUDA.CuFunction, ::CUDA.KernelState, ::Fu...
1 0 @CUDA/lib/cudadrv/execution.jl 146 #launch#46
5 0 @CUDA/lib/cudadrv/execution.jl 159 #launch#46
615 0 @CUDA/lib/cudadrv/execution.jl 135 cudacall##kw
11 11 @CUDA/lib/cudadrv/execution.jl 52 (::CUDA.var"#launch##kw")(::NamedTuple{(:threads, ...
595 24 @CUDA/lib/cudadrv/execution.jl 55 (::CUDA.var"#launch##kw")(::NamedTuple{(:threads, ...
6 0 @CUDA/lib/cudadrv/execution.jl 146 launch##kw
19 0 @CUDA/lib/cudadrv/execution.jl 32 macro expansion
505 0 @CUDA/lib/cudadrv/execution.jl 33 macro expansion
615 0 @CUDA/lib/cudadrv/execution.jl 95 macro expansion
10 10 @CUDA/lib/cudadrv/libcuda.jl 59 macro expansion
13 13 @CUDA/lib/cudadrv/libcuda.jl 144 macro expansion
1 0 @CUDA/lib/cudadrv/libcuda.jl 597 macro expansion
214 214 @CUDA/lib/cudadrv/libcuda.jl 598 macro expansion
2 0 @CUDA/lib/cudadrv/libcuda.jl 1031 macro expansion
29 29 @CUDA/lib/cudadrv/libcuda.jl 1032 macro expansion
7 0 @CUDA/lib/cudadrv/libcuda.jl 1226 macro expansion
494 494 @CUDA/lib/cudadrv/libcuda.jl 1227 macro expansion
5 5 @CUDA/lib/cudadrv/libcuda.jl 1254 macro expansion
18 0 @CUDA/lib/cudadrv/libcuda.jl 1633 macro expansion
149 140 @CUDA/lib/cudadrv/libcuda.jl 1634 macro expansion
1 0 @CUDA/lib/cudadrv/libcuda.jl 2221 macro expansion
84 84 @CUDA/lib/cudadrv/libcuda.jl 2222 macro expansion
4 0 @CUDA/lib/cudadrv/libcuda.jl 2266 macro expansion
113 ┌ Warning: Assignment to grad
in soft scope is ambiguous because a global variable by the same name exists: grad
will be treated as a new local. Disambiguate by using local grad
to suppress this warning or global grad
to assign to the existing global variable.
└ @ /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.6/Profile/src/Profile.jl:28
113 @CUDA/lib/cudadrv/libcuda.jl 2267 macro expansion
2 2 @CUDA/lib/cudadrv/libcuda.jl 150 unsafe_cuCtxGetDevice
1 0 @CUDA/lib/cudadrv/libcuda.jl 882 unsafe_cuPointerGetAttribute(data::Base.RefValue{U...
8 8 @CUDA/lib/cudadrv/libcuda.jl 883 unsafe_cuPointerGetAttribute(data::Base.RefValue{U...
2 2 @CUDA/lib/cudadrv/libcuda.jl 1023 unsafe_cuStreamQuery(hStream::CUDA.CuStream)
33 0 @CUDA/lib/cudadrv/libcuda.jl 1024 unsafe_cuStreamQuery(hStream::CUDA.CuStream)
308 308 @CUDA/lib/cudadrv/libcuda.jl 1025 unsafe_cuStreamQuery(hStream::CUDA.CuStream)
2 0 @CUDA/lib/cudadrv/memory.jl 77 #alloc#1
117 0 @CUDA/lib/cudadrv/memory.jl 83 #alloc#1
1 0 @CUDA/lib/cudadrv/memory.jl 89 #alloc#1
85 0 @CUDA/lib/cudadrv/memory.jl 97 #free#2
216 0 @CUDA/lib/cudadrv/memory.jl 397 #unsafe_copyto!#8
120 0 @CUDA/lib/cudadrv/memory.jl 75 alloc##kw
85 0 @CUDA/lib/cudadrv/memory.jl 93 free##kw
10 0 @CUDA/lib/cudadrv/memory.jl 849 is_pinned
219 0 @CUDA/lib/cudadrv/memory.jl 397 (::Base.var"#unsafe_copyto!##kw")(::NamedTuple{(:a...
1 0 ...A/lib/cudadrv/module/function.jl 27 hash(fun::CUDA.CuFunction, h::UInt64)
10 2 @CUDA/lib/cudadrv/occupancy.jl 61 (::CUDA.var"#52#54"{ComposedFunction{CUDA.var"#com...
1 0 @CUDA/lib/cudadrv/occupancy.jl 56 launch_configuration(fun::CUDA.CuFunction; shmem::...
1 0 @CUDA/lib/cudadrv/occupancy.jl 57 launch_configuration(fun::CUDA.CuFunction; shmem::...
85 2 @CUDA/lib/cudadrv/occupancy.jl 59 #launch_configuration#51
1 1 @CUDA/lib/cudadrv/occupancy.jl 61 launch_configuration(fun::CUDA.CuFunction; shmem::...
98 98 @CUDA/lib/cudadrv/occupancy.jl 62 launch_configuration(fun::CUDA.CuFunction; shmem::...
89 4 @CUDA/lib/cudadrv/occupancy.jl 63 launch_configuration(fun::CUDA.CuFunction; shmem::...
35 0 @CUDA/lib/cudadrv/occupancy.jl 56 launch_configuration
241 1 @CUDA/lib/cudadrv/occupancy.jl 56 launch_configuration##kw
2 0 @CUDA/lib/cudadrv/state.jl 162 context!(f::CUDA.var"#210#211"{CUDA.CuArray{Comple...
2 2 @CUDA/lib/cudadrv/state.jl 163 context!(f::CUDA.var"#210#211"{CUDA.CuArray{Comple...
1 1 @CUDA/lib/cudadrv/state.jl 163 #context!#63
107 1 @CUDA/lib/cudadrv/state.jl 164 context!(f::CUDA.var"#210#211"{CUDA.CuArray{Comple...
1346 1 @CUDA/lib/cudadrv/state.jl 164 #context!#63
26 2 @CUDA/lib/cudadrv/state.jl 106 active_state
7 0 @CUDA/lib/cudadrv/state.jl 107 active_state
3 0 @CUDA/lib/cudadrv/state.jl 122 context
2 0 @CUDA/lib/cudadrv/state.jl 140 context!(ctx::CUDA.CuContext)
1347 0 @CUDA/lib/cudadrv/state.jl 161 context!
112 1 @CUDA/lib/cudadrv/state.jl 161 (::CUDA.var"#context!##kw")(::NamedTuple{(:skip_de...
6 1 @CUDA/lib/cudadrv/state.jl 185 device
3 2 @CUDA/lib/cudadrv/state.jl 193 device_context
1 1 @CUDA/lib/cudadrv/state.jl 189 device_contexts()
1 0 @CUDA/lib/cudadrv/state.jl 314 deviceid
2 2 @CUDA/lib/cudadrv/state.jl 440 get!(constructor::CUDA.var"#192#193", x::CUDA.PerD...
3 0 @CUDA/lib/cudadrv/state.jl 443 get!(constructor::CUDA.var"#192#193", x::CUDA.PerD...
1 1 @CUDA/lib/cudadrv/state.jl 422 get_values(x::CUDA.PerDevice{Base.RefValue{Union{N...
54 8 @CUDA/lib/cudadrv/state.jl 88 prepare_cuda_state
13 0 @CUDA/lib/cudadrv/state.jl 94 prepare_cuda_state
18 1 @CUDA/lib/cudadrv/state.jl 346 stream
5 0 @CUDA/lib/cudadrv/state.jl 347 stream
4 4 @CUDA/lib/cudadrv/state.jl 350 stream
1 0 @CUDA/lib/cudadrv/state.jl 80 task_local_state()
1 0 @CUDA/lib/cudadrv/state.jl 81 task_local_state()
13 13 @CUDA/lib/cudadrv/state.jl 68 task_local_state!()
16 0 @CUDA/lib/cudadrv/state.jl 69 task_local_state!()
31 0 @CUDA/lib/cudadrv/state.jl 70 task_local_state!()
34 1 @CUDA/lib/cudadrv/state.jl 71 task_local_state!()
1 1 @CUDA/lib/cudadrv/state.jl 73 task_local_state!()
8 8 @CUDA/lib/cudadrv/state.jl 60 validate_task_local_state
1 0 @CUDA/lib/cudadrv/stream.jl 159 #13
1 1 @CUDA/lib/cudadrv/stream.jl 121 synchronize(stream::CUDA.CuStream; blocking::Nothing)
38 0 @CUDA/lib/cudadrv/stream.jl 128 synchronize(stream::CUDA.CuStream; blocking::Nothing)
31 0 @CUDA/lib/cudadrv/stream.jl 132 synchronize(stream::CUDA.CuStream; blocking::Nothing)
2 0 @CUDA/lib/cudadrv/stream.jl 134 synchronize(stream::CUDA.CuStream; blocking::Nothing)
2 2 @CUDA/lib/cudadrv/stream.jl 32 default_stream
349 7 @CUDA/lib/cudadrv/stream.jl 103 isdone
1 1 @CUDA/lib/cudadrv/stream.jl 104 isdone
34 0 @CUDA/lib/cudadrv/stream.jl 165 macro expansion
1072 0 @CUDA/lib/cudadrv/stream.jl 169 macro expansion
13 0 @CUDA/lib/cudadrv/stream.jl 182 macro expansion
1038 0 @CUDA/lib/cudadrv/stream.jl 183 macro expansion
49 0 @CUDA/lib/cudadrv/stream.jl 139 nonblocking_synchronize
9 9 @CUDA/lib/cudadrv/stream.jl 148 nonblocking_synchronize
668 4 @CUDA/lib/cudadrv/stream.jl 150 nonblocking_synchronize
301 0 @CUDA/lib/cudadrv/stream.jl 152 nonblocking_synchronize
1 0 @CUDA/lib/cudadrv/stream.jl 157 nonblocking_synchronize
6 0 @CUDA/lib/cudadrv/stream.jl 158 nonblocking_synchronize
47 0 @CUDA/lib/cudadrv/stream.jl 164 nonblocking_synchronize
72 0 @CUDA/lib/cudadrv/stream.jl 122 synchronize
13 0 @CUDA/lib/utils/call.jl 26 cuCtxGetCurrent
10 0 @CUDA/lib/utils/call.jl 26 cuDeviceGetAttribute
5 0 @CUDA/lib/utils/call.jl 26 cuLaunchHostFunc(hStream::CUDA.CuStream, fn::Ptr{N...
503 2 @CUDA/lib/utils/call.jl 26 cuLaunchKernel(f::CUDA.CuFunction, gridDimX::UInt3...
117 0 @CUDA/lib/utils/call.jl 26 cuMemAllocAsync(dptr::Base.RefValue{CUDA.CuPtr{Not...
85 0 @CUDA/lib/utils/call.jl 26 cuMemFreeAsync(dptr::CUDA.Mem.DeviceBuffer, hStrea...
216 1 @CUDA/lib/utils/call.jl 26 cuMemcpyDtoHAsync_v2(dstHost::Ptr{ComplexF64}, src...
169 2 @CUDA/lib/utils/call.jl 26 cuOccupancyMaxPotentialBlockSize(minGridSize::Base...
31 0 @CUDA/lib/utils/call.jl 26 cuStreamSynchronize(hStream::CUDA.CuStream)
1 0 @CUDA/lib/utils/memoization.jl 71 macro expansion
4 0 @CUDA/lib/utils/memoization.jl 74 macro expansion
1 1 @CUDA/lib/utils/memoization.jl 76 macro expansion
2 0 @CUDA/lib/utils/threading.jl 25 #get!#6
1 0 @CUDA/lib/utils/threading.jl 28 #get!#6
3 0 @CUDA/lib/utils/threading.jl 25 get!
106 2 @CUDA/src/array.jl 79 #210
1053 0 @CUDA/src/array.jl 410 (::CUDA.var"#217#218"{ComplexF64, Array{ComplexF64...
219 0 @CUDA/src/array.jl 413 (::CUDA.var"#217#218"{ComplexF64, Array{ComplexF64...
72 0 @CUDA/src/array.jl 420 (::CUDA.var"#217#218"{ComplexF64, Array{ComplexF64...
20 0 @CUDA/src/array.jl 42 CuArray
126 0 @CUDA/src/array.jl 42 CUDA.CuArray{ComplexF64, 2, CUDA.Mem.DeviceBuffer}...
7 7 @CUDA/src/array.jl 44 CUDA.CuArray{ComplexF64, 2, CUDA.Mem.DeviceBuffer}...
2 0 @CUDA/src/array.jl 45 CuArray
1 0 @CUDA/src/array.jl 45 CUDA.CuArray{ComplexF64, 2, CUDA.Mem.DeviceBuffer}...
129 0 @CUDA/src/array.jl 125 CuArray
129 0 @CUDA/src/array.jl 136 CuArray
19 0 @CUDA/src/array.jl 50 Type##kw
19 19 @CUDA/src/array.jl 51 _#208
5 0 @CUDA/src/array.jl 727 _derived_array
20 0 @CUDA/src/array.jl 732 _derived_array
3 0 @CUDA/src/array.jl 734 derived_array
1 1 @CUDA/src/array.jl 238 context
1349 1 @CUDA/src/array.jl 360 copyto!
1349 0 @CUDA/src/array.jl 364 copyto!
28 0 @CUDA/src/array.jl 719 reshape(a::CUDA.CuArray{ComplexF64, 2, CUDA.Mem.De...
28 0 @CUDA/src/array.jl 164 similar
1 1 @CUDA/src/array.jl 404 unsafe_copyto!(dest::Array{ComplexF64, 0}, doffs::...
1347 0 @CUDA/src/array.jl 406 unsafe_copyto!(dest::Array{ComplexF64, 0}, doffs::...
135 0 @CUDA/src/array.jl 99 unsafe_finalize!(xs::CUDA.CuArray{ComplexF64, 1, C...
3 3 @CUDA/src/array.jl 68 unsafe_free!(xs::CUDA.CuArray{ComplexF64, 2, CUDA....
5 0 @CUDA/src/array.jl 70 unsafe_free!(xs::CUDA.CuArray{ComplexF64, 2, CUDA....
9 4 @CUDA/src/array.jl 72 unsafe_free!(xs::CUDA.CuArray{ComplexF64, 0, CUDA....
1 0 @CUDA/src/array.jl 76 unsafe_free!(xs::CUDA.CuArray{ComplexF64, 1, CUDA....
1 1 @CUDA/src/array.jl 77 unsafe_free!(xs::CUDA.CuArray{ComplexF64, 3, CUDA....
113 0 @CUDA/src/array.jl 78 unsafe_free!(xs::CUDA.CuArray{ComplexF64, 1, CUDA....
1 1 @CUDA/src/array.jl 86 unsafe_free!(xs::CUDA.CuArray{ComplexF64, 4, CUDA....
95 0 @CUDA/src/broadcast.jl 11 similar
34 0 @CUDA/src/broadcast.jl 14 similar
1 1 @CUDA/src/compiler/exceptions.jl 26 check_exceptions()
1 1 @CUDA/src/compiler/exceptions.jl 38 check_exceptions()
14 14 @CUDA/src/compiler/execution.jl 483 (::CUDA.HostKernel{typeof(CUDA.partial_mapreduce_g...
345 0 @CUDA/src/compiler/execution.jl 484 ##260
555 139 @CUDA/src/compiler/execution.jl 484 (::CUDA.HostKernel{CuYao.var"#kernel#26"{BitBasis....
1 1 @CUDA/src/compiler/execution.jl ? cufunction(f::CuYao.var"#kernel#26"{BitBasis.IterC...
2 2 @CUDA/src/compiler/execution.jl 292 cufunction(f::CuYao.var"#kernel#26"{BitBasis.IterC...
26 0 @CUDA/src/compiler/execution.jl 293 cufunction(f::CuYao.var"#kernel#26"{BitBasis.IterC...
4 0 @CUDA/src/compiler/execution.jl 294 cufunction(f::CuYao.var"#kernel#28", tt::Type{Tupl...
22 0 @CUDA/src/compiler/execution.jl 296 cufunction(f::CuYao.var"#kernel#29", tt::Type{Tupl...
371 28 @CUDA/src/compiler/execution.jl 299 cufunction(f::CuYao.var"#kernel#29", tt::Type{Tupl...
25 19 @CUDA/src/compiler/execution.jl 304 cufunction(f::CuYao.var"#kernel#28", tt::Type{Tupl...
15 13 @CUDA/src/compiler/execution.jl 305 cufunction(f::CuYao.var"#kernel#29", tt::Type{Tupl...
6 6 @CUDA/src/compiler/execution.jl 314 cufunction(f::CuYao.var"#kernel#29", tt::Type{Tupl...
10 10 @CUDA/src/compiler/execution.jl 483 (::Core.var"#Any##kw")(::NamedTuple{(:threads, :bl...
743 6 @CUDA/src/compiler/execution.jl 484 Any##kw
196 19 @CUDA/src/compiler/execution.jl 484 (::Core.var"#Any##kw")(::NamedTuple{(:threads, :bl...
6 1 @CUDA/src/compiler/execution.jl 152 cudaconvert
345 2 @CUDA/src/compiler/execution.jl 293 cufunction
17 0 @CUDA/src/compiler/execution.jl 293 cufunction(f::CuYao.var"#kernel#26"{BitBasis.IterC...
99 1 @CUDA/src/compiler/execution.jl 293 cufunction##kw
16 0 @CUDA/src/compiler/execution.jl 293 (::CUDA.var"#cufunction##kw")(::NamedTuple{(:name,...
4 0 @CUDA/src/compiler/execution.jl 320 cufunction_cache
5 0 @CUDA/src/compiler/execution.jl 99 macro expansion
493 16 @CUDA/src/compiler/execution.jl 102 macro expansion
829 15 @CUDA/src/compiler/execution.jl 104 macro expansion
615 0 @CUDA/src/compiler/execution.jl 204 macro expansion
1 0 @CUDA/src/compiler/execution.jl 528 nextwarp(dev::CUDA.CuDevice, threads::Int64)
4 0 @CUDA/src/compiler/execution.jl 533 prevwarp
3 0 @CUDA/src/compiler/execution.jl 534 prevwarp
22 1 @CUDA/src/compiler/gpucompiler.jl 51 #CUDACompilerTarget#238
22 0 @CUDA/src/compiler/gpucompiler.jl 51 CUDACompilerTarget
5 5 @CUDA/src/compiler/gpucompiler.jl ? device_properties(dev::CUDA.CuDevice)
2 2 @CUDA/src/compiler/gpucompiler.jl 6 device_properties(dev::CUDA.CuDevice)
3 0 @CUDA/src/compiler/gpucompiler.jl 7 device_properties(dev::CUDA.CuDevice)
9 4 @CUDA/src/compiler/gpucompiler.jl 47 device_properties(dev::CUDA.CuDevice)
727 0 @CUDA/src/gpuarrays.jl 30 #gpu_call#281
7 7 @CUDA/src/gpuarrays.jl ? #launch_heuristic#280
217 0 @CUDA/src/gpuarrays.jl 17 #launch_heuristic#280
35 0 @CUDA/src/gpuarrays.jl 22 #launch_heuristic#280
50 0 @CUDA/src/gpuarrays.jl 24 #launch_heuristic#280
728 1 @CUDA/src/gpuarrays.jl 30 (::GPUArrays.var"#gpu_call##kw")(::NamedTuple{(:na...
2 2 @CUDA/src/gpuarrays.jl 15 (::GPUArrays.var"#launch_heuristic##kw")(::NamedTu...
309 0 @CUDA/src/gpuarrays.jl 17 (::GPUArrays.var"#launch_heuristic##kw")(::NamedTu...
1 1 @CUDA/src/mapreduce.jl ? mapreducedim!(f::typeof(identity), op::typeof(Base...
1 0 @CUDA/src/mapreduce.jl 173 mapreducedim!(f::typeof(identity), op::typeof(Base...
6 0 @CUDA/src/mapreduce.jl 174 mapreducedim!(f::typeof(identity), op::typeof(Base...
5 0 @CUDA/src/mapreduce.jl 200 mapreducedim!(f::typeof(identity), op::typeof(Base...
1 0 @CUDA/src/mapreduce.jl 213 mapreducedim!(f::typeof(identity), op::typeof(Base...
1 0 @CUDA/src/mapreduce.jl 220 mapreducedim!(f::typeof(identity), op::typeof(Base...
79 0 @CUDA/src/mapreduce.jl 234 mapreducedim!(f::typeof(identity), op::typeof(Base...
191 0 @CUDA/src/mapreduce.jl 236 mapreducedim!(f::typeof(identity), op::typeof(Base...
1 0 @CUDA/src/mapreduce.jl 249 mapreducedim!(f::typeof(identity), op::typeof(Base...
137 0 @CUDA/src/mapreduce.jl 261 mapreducedim!(f::typeof(identity), op::typeof(Base...
28 0 @CUDA/src/mapreduce.jl 264 mapreducedim!(f::typeof(identity), op::typeof(Base...
304 0 @CUDA/src/mapreduce.jl 272 mapreducedim!(f::typeof(identity), op::typeof(Base...
244 0 @CUDA/src/mapreduce.jl 275 mapreducedim!(f::typeof(identity), op::typeof(Base...
5 0 @CUDA/src/mapreduce.jl 164 big_mapreduce_threshold
8 1 @CUDA/src/mapreduce.jl 223 compute_threads
759 3 @CUDA/src/mapreduce.jl 172 mapreducedim!##kw
7 0 @CUDA/src/pool.jl 309 #_alloc#202
139 0 @CUDA/src/pool.jl 313 #_alloc#202
2 0 @CUDA/src/pool.jl 364 #_free#205
2 0 @CUDA/src/pool.jl 368 #_free#205
2 0 @CUDA/src/pool.jl 369 #_free#205
5 0 @CUDA/src/pool.jl 371 #_free#205
89 0 @CUDA/src/pool.jl 375 #_free#205
4 4 @CUDA/src/pool.jl 38 actual_alloc(bytes::Int64; async::Bool, stream::CU...
121 0 @CUDA/src/pool.jl 39 actual_alloc(bytes::Int64; async::Bool, stream::CU...
2 2 @CUDA/src/pool.jl 51 actual_alloc(bytes::Int64; async::Bool, stream::CU...
89 0 @CUDA/src/pool.jl 57 #actual_free#187
146 0 @CUDA/src/pool.jl 299 #alloc#201
1 1 @CUDA/src/pool.jl 346 #free#204
103 0 @CUDA/src/pool.jl 347 #free#204
1 1 @CUDA/src/pool.jl 308 (::CUDA.var"#_alloc##kw")(::NamedTuple{(:stream,),...
146 0 @CUDA/src/pool.jl 309 (::CUDA.var"#_alloc##kw")(::NamedTuple{(:stream,),...
100 0 @CUDA/src/pool.jl 364 (::CUDA.var"#_free##kw")(::NamedTuple{(:stream,), ...
127 0 @CUDA/src/pool.jl 38 actual_alloc##kw
89 0 @CUDA/src/pool.jl 57 actual_free##kw
146 0 @CUDA/src/pool.jl 295 alloc
104 0 @CUDA/src/pool.jl 342 (::CUDA.var"#free##kw")(::NamedTuple{(:stream,), T...
120 0 @CUDA/src/pool.jl 41 macro expansion
85 0 @CUDA/src/pool.jl 58 macro expansion
127 0 @CUDA/src/pool.jl 232 macro expansion
5 0 @CUDA/src/pool.jl 314 macro expansion
3 0 @CUDA/src/pool.jl 315 macro expansion
100 0 @CUDA/src/pool.jl 348 macro expansion
8 0 @CUDA/src/pool.jl 83 pool_mark(dev::CUDA.CuDevice)
8 0 @CUDA/src/pool.jl 78 pool_status
1 0 @CUDA/src/pool.jl 70 stream_ordered(dev::CUDA.CuDevice)
6 0 @CUDA/src/pool.jl 71 stream_ordered(dev::CUDA.CuDevice)
1354 0 @CuYao/src/CUDApatch.jl 66 as_scalar
125 0 @CuYao/src/gpuapplys.jl 107 _instruct!
16 0 @CuYao/src/gpuapplys.jl 109 _instruct!(state::CUDA.CuArray{ComplexF64, 1, CUDA...
77 1 @CuYao/src/gpuapplys.jl 112 _instruct!(state::CUDA.CuArray{ComplexF64, 1, CUDA...
1 0 @CuYao/src/gpuapplys.jl 114 _instruct!(state::CUDA.CuArray{ComplexF64, 1, CUDA...
206 4 @CuYao/src/gpuapplys.jl 121 _instruct!(state::CUDA.CuArray{ComplexF64, 1, CUDA...
12 0 @CuYao/src/gpuapplys.jl 165 _instruct!(state::CUDA.CuArray{ComplexF64, 1, CUDA...
177 0 @CuYao/src/gpuapplys.jl 175 _instruct!(state::CUDA.CuArray{ComplexF64, 1, CUDA...
13 0 @CuYao/src/gpuapplys.jl 52 instruct!(#unused#::Val{2}, state::CUDA.CuArray{Co...
21 0 @CuYao/src/gpuapplys.jl 56 instruct!(#unused#::Val{2}, state::CUDA.CuArray{Co...
14 14 @CuYao/src/gpuapplys.jl 59 instruct!(#unused#::Val{2}, state::CUDA.CuArray{Co...
295 3 @CuYao/src/gpuapplys.jl 65 instruct!(#unused#::Val{2}, state::CUDA.CuArray{Co...
14 0 @CuYao/src/gpuapplys.jl 88 instruct!(#unused#::Val{2}, state::CUDA.CuArray{Co...
197 0 @CuYao/src/gpuapplys.jl 98 instruct!(#unused#::Val{2}, state::CUDA.CuArray{Co...
1 0 @CuYao/src/gpuapplys.jl 214 instruct!
315 0 @CuYao/src/gpuapplys.jl 218 instruct!
175 0 @CuYao/src/gpuapplys.jl 226 instruct!
242 2 @GPUArrays/src/device/execution.jl 68 #gpu_call#1
625 2 @GPUArrays/src/device/execution.jl 72 #gpu_call#1
105 0 @GPUArrays/src/device/execution.jl 76 #gpu_call#1
494 1 @GPUArrays/src/device/execution.jl 48 (::GPUArrays.var"#gpu_call##kw")(::NamedTuple{(:el...
479 0 @GPUArrays/src/device/execution.jl 48 gpu_call##kw
72 1 @GPUArrays/src/host/broadcast.jl 73 _copyto!
105 0 @GPUArrays/src/host/broadcast.jl 77 _copyto!
272 0 @GPUArrays/src/host/broadcast.jl 47 copy
177 0 @GPUArrays/src/host/broadcast.jl 56 copyto!
2 2 @GPUArrays/src/host/mapreduce.jl 35 _mapreduce(f::typeof(identity), op::typeof(Base.ad...
34 0 @GPUArrays/src/host/mapreduce.jl 64 _mapreduce(f::typeof(identity), op::typeof(Base.ad...
759 0 @GPUArrays/src/host/mapreduce.jl 69 _mapreduce(f::typeof(identity), op::typeof(Base.ad...
793 0 @GPUArrays/src/host/mapreduce.jl 31 #mapreduce#20
793 0 @GPUArrays/src/host/mapreduce.jl 37 _mapreduce##kw
793 0 @GPUArrays/src/host/mapreduce.jl 31 mapreduce##kw
1 0 @GPUArrays/src/host/quirks.jl 25 _axes
1 0 @GPUArrays/src/host/quirks.jl 22 combine_axes
3 3 @GPUCompiler/src/cache.jl 66 cached_compilation(cache::Dict{UInt64, Any}, job::...
290 190 @GPUCompiler/src/cache.jl 71 cached_compilation(cache::Dict{UInt64, Any}, job::...
1 0 @GPUCompiler/src/cache.jl 72 cached_compilation(cache::Dict{UInt64, Any}, job::...
18 0 @GPUCompiler/src/cache.jl 78 cached_compilation(cache::Dict{UInt64, Any}, job::...
7 7 @GPUCompiler/src/cache.jl 79 cached_compilation(cache::Dict{UInt64, Any}, job::...
2 0 @GPUCompiler/src/cache.jl 80 cached_compilation(cache::Dict{UInt64, Any}, job::...
9 2 @GPUCompiler/src/cache.jl 101 cached_compilation(cache::Dict{UInt64, Any}, job::...
1 1 @GPUCompiler/src/interface.jl 73 hash(spec::GPUCompiler.FunctionSpec{CuYao.var"#ker...
3 0 @GPUCompiler/src/interface.jl 74 hash(spec::GPUCompiler.FunctionSpec{CuYao.var"#ker...
2 0 @GPUCompiler/src/interface.jl 75 hash(spec::GPUCompiler.FunctionSpec{CuYao.var"#ker...
2 0 @GPUCompiler/src/interface.jl 76 hash(spec::GPUCompiler.FunctionSpec{CuYao.var"#ker...
2 1 @GPUCompiler/src/interface.jl 77 hash(spec::GPUCompiler.FunctionSpec{CuYao.var"#ker...
4 0 @GPUCompiler/src/interface.jl 78 hash(spec::GPUCompiler.FunctionSpec{CuYao.var"#ker...
33 0 @GPUCompiler/src/interface.jl 160 hash(job::GPUCompiler.CompilerJob{GPUCompiler.PTXC...
13 0 @GPUCompiler/src/interface.jl 161 hash(job::GPUCompiler.CompilerJob{GPUCompiler.PTXC...
1 0 @GPUCompiler/src/interface.jl 162 hash(job::GPUCompiler.CompilerJob{GPUCompiler.PTXC...
3 0 @GPUCompiler/src/interface.jl 163 hash(job::GPUCompiler.CompilerJob{GPUCompiler.PTXC...
1 1 @GPUCompiler/src/interface.jl 164 hash(job::GPUCompiler.CompilerJob{GPUCompiler.PTXC...
3 3 @GPUCompiler/src/ptx.jl 8 PTXCompilerTarget
3 3 @GPUCompiler/src/ptx.jl 26 hash(target::GPUCompiler.PTXCompilerTarget, h::UIn...
2 0 @GPUCompiler/src/ptx.jl 27 hash(target::GPUCompiler.PTXCompilerTarget, h::UIn...
4 0 @GPUCompiler/src/ptx.jl 28 hash(target::GPUCompiler.PTXCompilerTarget, h::UIn...
2 0 @GPUCompiler/src/ptx.jl 31 hash(target::GPUCompiler.PTXCompilerTarget, h::UIn...
1 0 @GPUCompiler/src/ptx.jl 32 hash(target::GPUCompiler.PTXCompilerTarget, h::UIn...
6 0 @GPUCompiler/src/ptx.jl 34 hash(target::GPUCompiler.PTXCompilerTarget, h::UIn...
6 1 @GPUCompiler/src/ptx.jl 35 hash(target::GPUCompiler.PTXCompilerTarget, h::UIn...
6 2 @GPUCompiler/src/ptx.jl 36 hash(target::GPUCompiler.PTXCompilerTarget, h::UIn...
5 0 @GPUCompiler/src/ptx.jl 37 hash(target::GPUCompiler.PTXCompilerTarget, h::UIn...
39 0 @LuxurySparse/src/arraymath.jl 29 *
2 0 @LuxurySparse/src/arraymath.jl 34 *(A::LuxurySparse.SDPermMatrix{ComplexF64, Int64, ...
2 0 @LuxurySparse/src/arraymath.jl 35 *
176 0 @YaoArrayRegister/src/instruct.jl 30 instruct!
894 1 @YaoArrayRegister/src/instruct.jl 40 instruct!
8 1 @YaoArrayRegister/src/register.jl 192 nactive
4 0 @YaoBase/src/utils/math.jl 349 logdi(x::Int64, d::Int64)
2 0 @YaoBase/src/utils/math.jl 350 logdi(x::Int64, d::Int64)
1 0 @YaoBase/src/utils/math.jl 351 logdi(x::Int64, d::Int64)
2 2 @YaoBlocks/src/abstract_block.jl 332 _check_size(r::ArrayReg{2, ComplexF64, CUDA.CuArra...
8 0 @YaoBlocks/src/abstract_block.jl 333 _check_size(r::ArrayReg{2, ComplexF64, CUDA.CuArra...
10 0 @YaoBlocks/src/abstract_block.jl 9 apply!
1147 0 @YaoBlocks/src/abstract_block.jl 10 apply!
66 2 @YaoBlocks/src/abstract_block.jl 101 mat_matchreg
620 0 @YaoBlocks/src/abstract_block.jl 41 |>
3299 0 ...locks/src/autodiff/apply_back.jl 171 apply_back(st::Tuple{ArrayReg{2, ComplexF64, CUDA....
3299 0 ...locks/src/autodiff/apply_back.jl 170 apply_back(st::Tuple{ArrayReg{2, ComplexF64, CUDA....
2686 1 ...locks/src/autodiff/apply_back.jl 42 apply_back!(st::Tuple{ArrayReg{2, ComplexF64, CUDA...
247 0 ...locks/src/autodiff/apply_back.jl 43 apply_back!(st::Tuple{ArrayReg{2, ComplexF64, CUDA...
205 0 ...locks/src/autodiff/apply_back.jl 44 apply_back!(st::Tuple{ArrayReg{2, ComplexF64, CUDA...
44 0 ...locks/src/autodiff/apply_back.jl 79 apply_back!(st::Tuple{ArrayReg{2, ComplexF64, CUDA...
68 0 ...locks/src/autodiff/apply_back.jl 80 apply_back!(st::Tuple{ArrayReg{2, ComplexF64, CUDA...
41 0 ...locks/src/autodiff/apply_back.jl 82 apply_back!(st::Tuple{ArrayReg{2, ComplexF64, CUDA...
1 0 ...locks/src/autodiff/apply_back.jl 122 apply_back!(st::Tuple{ArrayReg{2, ComplexF64, CUDA...
3299 7 ...locks/src/autodiff/apply_back.jl 123 apply_back!(st::Tuple{ArrayReg{2, ComplexF64, CUDA...
2490 0 ...locks/src/autodiff/apply_back.jl 156 backward_params!(st::Tuple{ArrayReg{2, ComplexF64,...
193 0 ...locks/src/autodiff/apply_back.jl 160 backward_params!(st::Tuple{ArrayReg{2, ComplexF64,...
68 0 ...f/outerproduct_and_projection.jl 54 outerprod(outδ::ArrayReg{2, ComplexF64, CUDA.CuArr...
3598 0 ...ocks/src/autodiff/specializes.jl 9 (::LinearAlgebra.Adjoint{Any, typeof(expect)})(op:...
295 0 ...ocks/src/autodiff/specializes.jl 14 expect_g(op::Add{2}, circuit::Pair{ArrayReg{2, Com...
1 0 ...ocks/src/autodiff/specializes.jl 15 expect_g(op::Add{2}, circuit::Pair{ArrayReg{2, Com...
3299 0 ...ocks/src/autodiff/specializes.jl 16 expect_g(op::Add{2}, circuit::Pair{ArrayReg{2, Com...
3 0 ...ocks/src/autodiff/specializes.jl 17 expect_g(op::Add{2}, circuit::Pair{ArrayReg{2, Com...
1 0 @YaoBlocks/src/composite/chain.jl 94 _apply!(r::ArrayReg{2, ComplexF64, CUDA.CuArray{Co...
295 8 @YaoBlocks/src/composite/chain.jl 95 _apply!(r::ArrayReg{2, ComplexF64, CUDA.CuArray{Co...
93 0 @YaoBlocks/src/composite/control.jl 141 _apply!(r::ArrayReg{2, ComplexF64, CUDA.CuArray{Co...
83 0 @YaoBlocks/src/composite/control.jl 141 _apply!
191 0 ...locks/src/composite/put_block.jl 83 _apply!(r::ArrayReg{2, ComplexF64, CUDA.CuArray{Co...
447 0 ...locks/src/composite/put_block.jl 83 _apply!
321 0 ...locks/src/composite/put_block.jl 93 _apply!
1 0 @YaoBlocks/src/composite/reduce.jl 32 (::YaoBlocks.var"#117#118"{ArrayReg{2, ComplexF64,...
1 0 @YaoBlocks/src/composite/reduce.jl 32 _apply!(r::ArrayReg{2, ComplexF64, CUDA.CuArray{Co...
1 0 @YaoBlocks/src/composite/reduce.jl 33 _apply!(r::ArrayReg{2, ComplexF64, CUDA.CuArray{Co...
1 0 ...Blocks/src/composite/repeated.jl 122 _apply!(r::ArrayReg{2, ComplexF64, CUDA.CuArray{Co...
64 1 ...s/src/primitive/rotation_gate.jl 87 mat(#unused#::Type{ComplexF64}, R::RotationGate{2,...
Total snapshots: 154589