{ "cells": [ { "outputs": [], "cell_type": "code", "source": [ "using BenchmarkTools\n", "\n", "versioninfo(verbose=true)" ], "metadata": {}, "execution_count": null }, { "outputs": [], "cell_type": "markdown", "source": [ "## HWloc" ], "metadata": {} }, { "outputs": [], "cell_type": "code", "source": [ "using Pkg\n", "Pkg.add(\"Hwloc\")\n", "\n", "using Hwloc\n", "Hwloc.num_physical_cores()" ], "metadata": {}, "execution_count": null }, { "outputs": [], "cell_type": "markdown", "source": [ "##Starting Threads" ], "metadata": {} }, { "outputs": [], "cell_type": "code", "source": [ "using Base.Threads\n", "nthreads()\n", "\n", "threadid()\n", "\n", "a=zeros(4)\n", "for i in 1:nthreads()\n", " a[threadid()] = threadid()\n", "end\n", "a\n", "# ## The @Threads macro\n", " @threads for i in 1:nthreads()\n", " a[threadid()] = threadid()\n", " end\n", "# ### Prefix sum\n", "function prefix_threads!(y)\n", " l=length(y)\n", " k=ceil(Int, log2(l))\n", " for j=1:k\n", " @threads for i=2^j:2^j:min(l, 2^k)\n", " @inbounds y[i] = y[i-2^(j-1)] + y[i]\n", " end\n", " end\n", " for j=(k-1):-1:1\n", " @threads for i=3*2^(j-1):2^j:min(l, 2^k)\n", " @inbounds y[i] = y[i-2^(j-1)] + y[i]\n", " end\n", " end\n", " y\n", "end" ], "metadata": {}, "execution_count": null }, { "outputs": [], "cell_type": "markdown", "source": [ "# Thread Safety" ], "metadata": {} }, { "outputs": [], "cell_type": "markdown", "source": [ "## Monte Carlo" ], "metadata": {} }, { "outputs": [], "cell_type": "code", "source": [ "using Random\n", "function darts_in_circle(n, rng=Random.GLOBAL_RNG)\n", " inside = 0\n", " for i in 1:n\n", " if rand(rng)^2 + rand(rng)^2 < 1\n", " inside += 1\n", " end\n", " end\n", " return inside\n", "end\n", "\n", "function pi_serial(n)\n", " return 4 * darts_in_circle(n) / n\n", "end\n", "\n", "using Base.Threads\n", "\n", "const rnglist = [MersenneTwister() for i in 1:nthreads()];\n", "\n", "function pi_threads(n, loops)\n", " inside = zeros(Int, loops)\n", " @threads for i in 1:loops\n", " rng = rnglist[threadid()]\n", " inside[threadid()] = darts_in_circle(n, rng)\n", " end\n", " return 4 * sum(inside) / (n*loops)\n", "end\n", "\n", "pi_threads(2_500_000, 4)\n", "\n", " @btime pi_serial(10_000_000)\n", "\n", " @btime pi_threads(2_500_000, 4)\n", " # ## Atomics\n", " function sum_thread_base(x)\n", " r = zero(eltype(x))\n", " @threads for i in eachindex(x)\n", " @inbounds r += x[i]\n", " end\n", " return r\n", "end\n", "\n", "a=rand(10_000_000);\n", "\n", "@btime sum($a)\n", "\n", "@btime sum_thread_base($a)\n", "\n", "function sum_thread_atomic(x)\n", " r = Atomic{eltype(x)}(zero(eltype(x)))\n", " @threads for i in eachindex(x)\n", " @inbounds atomic_add!(r, x[i])\n", " end\n", " return r[]\n", "end\n", "\n", "\n", " @btime sum_thread_atomic($a)\n", "\n", "\n", " function sum_thread_split(A)\n", " r = Atomic{eltype(A)}(zero(eltype(A)))\n", " len, rem = divrem(length(A), nthreads())\n", " #Split the array equally among the threads\n", " @threads for t in 1:nthreads()\n", " r[] = zero(eltype(A))\n", " @simd for i in (1:len) .+ (t-1)*len\n", " @inbounds r[] += A[i]\n", " end\n", " atomic_add!(r, r[])\n", " end\n", " result = r[]\n", " #process up the remaining data\n", " @simd for i in length(A)-rem+1:length(A)\n", " @inbounds result += A[i]\n", " end\n", " return result\n", "end\n", "\n", "@btime sum_thread_split($a)\n", "\n", "@btime sum_thread_split($a)" ], "metadata": {}, "execution_count": null }, { "outputs": [], "cell_type": "markdown", "source": [ "## Synchronisation primitives" ], "metadata": {} }, { "outputs": [], "cell_type": "code", "source": [ "const f = open(tempname(), \"a+\")\n", "\n", "const mt = Base.Threads.Mutex();\n", "\n", "@threads for i in 1:50\n", " r = pi_serial(10_000_000)\n", " lock(mt)\n", " write(f, \"From $(threadid()), pi = $r\\n\")\n", " unlock(mt)\n", "end\n", "\n", "\n", "close(f)\n", "\n", "\n", "const s = Base.Semaphore(2);\n", "@threads for i in 1:nthreads()\n", " Base.acquire(s)\n", " r = pi_serial(10_000_000)\n", " Core.println(\"Calculated pi = $r in Thread $(threadid())\")\n", " Base.release(s)\n", "end" ], "metadata": {}, "execution_count": null }, { "outputs": [], "cell_type": "markdown", "source": [ "# Threaded libraries" ], "metadata": {} }, { "outputs": [], "cell_type": "code", "source": [ "a = rand(1000, 1000);\n", "\n", "b = rand(1000, 1000);\n", "\n", "@btime $a*$b;" ], "metadata": {}, "execution_count": null }, { "outputs": [], "cell_type": "markdown", "source": [ "## Oversubscriptions" ], "metadata": {} }, { "outputs": [], "cell_type": "code", "source": [ "function matmul_serial(x)\n", " first_num = zeros(length(x))\n", " for i in eachindex(x)\n", " @inbounds first_num[i] = (x[i]'*x[i])[1]\n", " end\n", " return first_num\n", "end\n", "\n", "\n", "function matmul_thread(x)\n", " first_num = zeros(length(x))\n", " @threads for i in eachindex(x)\n", " @inbounds first_num[i] = (x[i]'*x[i])[1]\n", " end\n", " return first_num\n", "end\n", "\n", "m = [rand(1000, 1000) for _ in 1:10];\n", "\n", "@btime matmul_serial(m);\n", "\n", "@btime matmul_thread(m);\n", "\n", "using LinearAlgebra\n", "BLAS.set_num_threads(1)\n", "\n", "@btime matmul_thread(m);" ], "metadata": {}, "execution_count": null }, { "outputs": [], "cell_type": "markdown", "source": [ "*This notebook was generated using [Literate.jl](https://github.com/fredrikekre/Literate.jl).*" ], "metadata": {} } ], "nbformat_minor": 3, "metadata": { "language_info": { "file_extension": ".jl", "mimetype": "application/julia", "name": "julia", "version": "1.1.0" }, "kernelspec": { "name": "julia-1.1", "display_name": "Julia 1.1.0", "language": "julia" } }, "nbformat": 4 }