Multi-threading simualtion
# Particles | # Iterations | Execution time (w/o output) |
---|---|---|
1.5M | 1.8k | 6 min (8 threads) |
This example simulates a Taylor impact test using multi-threading.
using Tesserae
using StableRNGs
using TimerOutputs
function vonmises_model(Cᵖⁿ⁻¹, ε̄ᵖⁿ, F; λ, μ, H, τ̄y⁰)
κ = λ + 2μ/3 # Bulk modulus
J = det(F) # Jacobian
p = κ * log(J) / J # Pressure
bᵉᵗʳ = symmetric(F * Cᵖⁿ⁻¹ * F') # Trial left Cauchy-Green tensor
vals, vecs = eigen(bᵉᵗʳ) # Eigenvalue decomposition
λᵉᵗʳ = sqrt.(vals) # Trial stretches
nᵗʳₐ = (vecs[:,1], vecs[:,2], vecs[:,3]) # Principal directions
τ′ᵗʳ = @. 2μ*log(λᵉᵗʳ) - 2μ/3*log(J) # Trial Kirchhoff stress
f(τ) = sqrt(3τ⋅τ/2) - (τ̄y⁰ + H*ε̄ᵖⁿ) # Yield function
dfdσ, fᵗʳ = gradient(f, τ′ᵗʳ, :all)
if fᵗʳ > 0
ν = τ′ᵗʳ / (sqrt(2/3) * norm(τ′ᵗʳ)) # Direction vector
Δγ = fᵗʳ / (3μ + H) # Incremental plastic multiplier
Δεᵖ = Δγ * dfdσ # Incremental logarithmic plastic stretch
λᵉ = @. exp(log(λᵉᵗʳ) - Δεᵖ) # Elastic stretch
τ′ = τ′ᵗʳ - 2μ*Δεᵖ # Return map
else # Elastic response
Δγ = zero(H)
λᵉ = λᵉᵗʳ
τ′ = τ′ᵗʳ
end
# Update inverse of elastic left Cauchy-Green tensor
nₐ = nᵗʳₐ
bᵉ = mapreduce((λᵉ,nₐ) -> λᵉ^2 * nₐ^⊗(2), +, λᵉ, nₐ)
# Update stress
σ′ = τ′ / J # Principal deviatoric Cauchy stress
σ = @. σ′ + p # Principal Cauchy stress
σ = mapreduce((σ,nₐ) -> σ * nₐ^⊗(2), +, σ, nₐ)
# Update state variables
F⁻¹ = inv(F)
Cᵖ⁻¹ = symmetric(F⁻¹ * bᵉ * F⁻¹') # Update plastic right Cauchy-Green tensor
ε̄ᵖ = ε̄ᵖⁿ + Δγ # Update equivalent plastic strain
σ, Cᵖ⁻¹, ε̄ᵖ
end
function main()
# Simulation parameters
T = 80e-6 # Time span
CFL = 0.8 # Courant number
# Material constants
E = 117e9 # Young's modulus
ν = 0.35 # Poisson's ratio
λ = (E*ν) / ((1+ν)*(1-2ν)) # Lame's first parameter
μ = E / 2(1 + ν) # Shear modulus
ρ⁰ = 8.93e3 # Initial density
H = 0.1e9 # Hardening parameter
τ̄y⁰ = 0.4e9 # Initial yield stress
# Geometry parameters for rod
R = 0.0032
L = 0.0324
GridProp = @NamedTuple begin
x :: Vec{3, Float64}
m :: Float64
v :: Vec{3, Float64}
vⁿ :: Vec{3, Float64}
mv :: Vec{3, Float64}
f :: Vec{3, Float64}
end
ParticleProp = @NamedTuple begin
x :: Vec{3, Float64}
m :: Float64
V :: Float64
v :: Vec{3, Float64}
∇v :: SecondOrderTensor{3, Float64, 9}
σ :: SymmetricSecondOrderTensor{3, Float64, 6}
F :: SecondOrderTensor{3, Float64, 9}
c :: Float64
ε̄ᵖ :: Float64
Cᵖ⁻¹ :: SymmetricSecondOrderTensor{3, Float64, 6}
end
# Background grid
grid = generate_grid(GridProp, CartesianMesh(R/12, (-3R,3R), (-3R,3R), (0,L+0.1L)))
# Particles
block = extract(grid.x, (-R,R), (-R,R), (0,L))
particles = generate_particles(ParticleProp, block; alg=PoissonDiskSampling(spacing=1/3, rng=StableRNG(1234)))
particles.V .= volume(block) / length(particles)
filter!(pt -> pt.x[1]^2 + pt.x[2]^2 < R^2, particles)
@. particles.m = ρ⁰ * particles.V
@. particles.F = one(particles.F)
@. particles.Cᵖ⁻¹ = one(particles.Cᵖ⁻¹)
particles.v .= Ref([0,0,-227]) # Set initial velocity
@show length(particles)
# Interpolation
mpvalues = generate_mpvalues(KernelCorrection(BSpline(Quadratic())), grid.x, length(particles))
# Color partitioning for multi-threaded G2P transfer
if Threads.nthreads() == 1
partition = nothing
else
partition = ColorPartition(grid.x)
end
# Paraview output setup
outdir = mkpath(joinpath("output", "taylor_impact"))
pvdfile = joinpath(outdir, "paraview")
closepvd(openpvd(pvdfile)) # create file
t = 0.0
step = 0
fps = 300e3
savepoints = collect(LinRange(t, T, round(Int, T*fps)+1))
reset_timer!()
Tesserae.@showprogress while t < T
@timeit "Update timestep" begin
@threaded for p in eachindex(particles)
particles.c[p] = sqrt((λ+2μ) / (particles.m[p]/particles.V[p])) + norm(particles.v[p])
end
Δt::Float64 = CFL * spacing(grid.x) / maximum(particles.c)
end
@timeit "Update interpolation" begin
update!(mpvalues, particles, grid.x) # Automatically uses multi-threading
end
if partition !== nothing
@timeit "Update color partition" begin
update!(partition, particles.x)
end
end
@timeit "P2G transfer" begin
@threaded @P2G grid=>i particles=>p mpvalues=>ip partition begin
m[i] = @∑ w[ip] * m[p]
mv[i] = @∑ w[ip] * m[p] * v[p]
f[i] = @∑ -V[p] * σ[p] * ∇w[ip]
end
end
@timeit "Grid computation" begin
@. grid.vⁿ = grid.mv / grid.m * !iszero(grid.m)
@. grid.v = grid.vⁿ + Δt * grid.f / grid.m * !iszero(grid.m)
end
@timeit "Apply boundary conditions" begin
for i in eachindex(grid)[:,:,1]
grid.vⁿ[i] = grid.vⁿ[i] .* (true,true,false)
grid.v[i] = grid.v[i] .* (true,true,false)
end
end
@timeit "G2P transfer" begin
@threaded @G2P grid=>i particles=>p mpvalues=>ip begin
v[p] += @∑ w[ip] * (v[i] - vⁿ[i])
∇v[p] = @∑ v[i] ⊗ ∇w[ip]
x[p] += @∑ w[ip] * v[i] * Δt
end
end
@timeit "Particle computation" begin
@threaded for p in eachindex(particles)
ΔFₚ = I + Δt*particles.∇v[p]
Fₚ = ΔFₚ * particles.F[p]
σₚ, Cᵖ⁻¹ₚ, ε̄ᵖₚ = vonmises_model(particles.Cᵖ⁻¹[p], particles.ε̄ᵖ[p], Fₚ; λ, μ, H, τ̄y⁰)
particles.σ[p] = σₚ
particles.F[p] = Fₚ
particles.V[p] = det(ΔFₚ) * particles.V[p]
particles.Cᵖ⁻¹[p] = Cᵖ⁻¹ₚ
particles.ε̄ᵖ[p] = ε̄ᵖₚ
end
end
t += Δt
step += 1
if t > first(savepoints)
@timeit "Write results" begin
popfirst!(savepoints)
openpvd(pvdfile; append=true) do pvd
openvtm(string(pvdfile, step)) do vtm
openvtk(vtm, particles.x) do vtk
vtk["velocity"] = particles.v
vtk["plastic strain"] = particles.ε̄ᵖ
end
openvtk(vtm, grid.x) do vtk
vtk["velocity"] = grid.v
end
pvd[t] = vtm
end
end
end
if partition !== nothing
@timeit "Reorder particles" begin
reorder_particles!(particles, partition)
end
end
end
end
print_timer()
end
Performance
The following results were obtained using 8 threads (started with julia -t8
).
julia> versioninfo()
Julia Version 1.11.5
Commit 760b2e5b739 (2025-04-14 06:53 UTC)
Build Info:
Official https://julialang.org/ release
Platform Info:
OS: macOS (arm64-apple-darwin24.0.0)
CPU: 24 × Apple M2 Ultra
WORD_SIZE: 64
LLVM: libLLVM-16.0.6 (ORCJIT, apple-m2)
Threads: 8 default, 0 interactive, 4 GC (on 16 virtual cores)
julia> main()
length(particles) = 1526888
Progress: 100%|███████████████████████████████████████| Time: 0:07:01
Elapsed: 0:07:01
Iterations: 1,831
Speed: 0.18 s/it
──────────────────────────────────────────────────────────────────────────────────────
Time Allocations
─────────────────────── ────────────────────────
Tot / % measured: 422s / 99.7% 21.1GiB / 99.9%
Section ncalls time %tot avg alloc %tot avg
──────────────────────────────────────────────────────────────────────────────────────
Particle computation 1.83k 229s 54.5% 125ms 12.4MiB 0.1% 6.91KiB
Write results 25 79.2s 18.8% 3.17s 7.08GiB 33.5% 290MiB
G2P transfer 1.83k 35.7s 8.5% 19.5ms 12.8MiB 0.1% 7.14KiB
P2G transfer 1.83k 33.2s 7.9% 18.1ms 1.94GiB 9.2% 1.09MiB
Update interpolation 1.83k 26.9s 6.4% 14.7ms 16.4MiB 0.1% 9.20KiB
Grid computation 1.83k 9.53s 2.3% 5.21ms 0.00B 0.0% 0.00B
Update timestep 1.83k 3.24s 0.8% 1.77ms 10.9MiB 0.1% 6.11KiB
Reorder particles 25 1.81s 0.4% 72.5ms 11.8GiB 55.7% 482MiB
Update color partition 1.83k 1.79s 0.4% 976μs 59.1MiB 0.3% 33.0KiB
Apply boundary conditions 1.83k 126ms 0.0% 69.1μs 229MiB 1.1% 128KiB
──────────────────────────────────────────────────────────────────────────────────────
Scalability
This page was generated using Literate.jl.