An Introduction to the Julia Programming Language

Varialbles and Types

In [1]:
56*8
Out[1]:
448
In [2]:
x=7
Out[2]:
7
In [3]:
x+1
Out[3]:
8
In [4]:
x^2
Out[4]:
49
In [5]:
typeof(x)
Out[5]:
Int64
In [6]:
x/3
Out[6]:
2.3333333333333335
In [7]:
cos(pi)
Out[7]:
-1.0
In [8]:
typeof(ans)
Out[8]:
Float64
In [9]:
im^2
Out[9]:
-1 + 0im
In [10]:
typeof(ans)
Out[10]:
Complex{Int64}
In [11]:
"Hello" * " world"
Out[11]:
"Hello world"
In [12]:
typeof(ans)
Out[12]:
ASCIIString
In [13]:
70//15
Out[13]:
14//3
In [14]:
true
Out[14]:
true
In [15]:
12 ≠ 42
Out[15]:
true
In [16]:
2 ∈ [6,3,5]
Out[16]:
false
In [17]:
typeof(ans)
Out[17]:
Bool
In [18]:
"Like in perl you may write x=$x"
Out[18]:
"Like in perl you may write x=7"
In [19]:
"... but also $(exp(x))"
Out[19]:
"... but also 1096.6331584284585"

Type Hierarchy

The type hierarchy for numbers looks like: type hierarchy

Functions + Multiple Dispatch

One can define functions simply like that:

In [20]:
f(x) = x ^ 3
Out[20]:
f (generic function with 1 method)
In [21]:
f(3)
Out[21]:
27
In [22]:
f("ha ")
Out[22]:
"ha ha ha "
In [23]:
function f(x::AbstractString) 
    println("\nnow in f(x::AbstractString) with x=$x of type $(typeof(x))")
    "Hello $x"
end

function f(x::AbstractFloat) 
    println("\nnow in f(x::AbstractFloat) with x=$x of type $(typeof(x))")
    x+1.0
end
Out[23]:
f (generic function with 3 methods)
In [24]:
methods(f)
Out[24]:
3 methods for generic function f:
  • f(x::AbstractString) at In[23]:2
  • f(x::AbstractFloat) at In[23]:7
  • f(x) at In[20]:1
In [25]:
@show f(2.5);
now in f(x::AbstractFloat) with x=2.5 of type Float64
f(2.5) = 3.5
In [26]:
@show f( Float16(2.5) );
now in f(x::AbstractFloat) with x=2.5 of type Float16
f(Float16(2.5)) = 3.5
In [27]:
@show f("abc")
@show f("αβγ");
now in f(x::AbstractString) with x=abc of type ASCIIString
f("abc") = "Hello abc"

now in f(x::AbstractString) with x=αβγ of type UTF8String
f("αβγ") = "Hello αβγ"
In [28]:
methods(print)
Out[28]:
18 methods for generic function print:
In [29]:
methods(println)
Out[29]:
2 methods for generic function println:

Composite Types

In [30]:
type Point
    x::Int
    y::Int
    name::ASCIIString
end
In [31]:
p = Point(2,3,"Siegfried")
Out[31]:
Point(2,3,"Siegfried")
In [35]:
function f(p::Point)
    println("\nnow in f(x::Point) with p=$p of type $(typeof(p))")
    p.x = p.x + p.y
    return p.name
end
Out[35]:
f (generic function with 4 methods)
In [36]:
methods(f)
Out[36]:
4 methods for generic function f:
  • f(x::AbstractString) at In[23]:2
  • f(x::AbstractFloat) at In[23]:7
  • f(p::Point) at In[35]:2
  • f(x) at In[20]:1
In [37]:
f(p)
now in f(x::Point) with p=Point(5,3,"Siegfried") of type Point
Out[37]:
"Siegfried"
In [38]:
p
Out[38]:
Point(8,3,"Siegfried")

parametric types and methods

In [39]:
type Line{T}
    x0::T
    y0::T
    x1::T
    y1::T
    name::ASCIIString
end
In [40]:
L1=Line(23,3,4,44,"Julia")
Out[40]:
Line{Int64}(23,3,4,44,"Julia")
In [41]:
L2=Line(23.777,3.5,4.1,4.0,"Rolf")
Out[41]:
Line{Float64}(23.777,3.5,4.1,4.0,"Rolf")
In [42]:
function f{T}(x::Line{T}) 
    println("\nnow in f(x::Line{T}) with x=$x of type $(typeof(x))")
    x.name
end
Out[42]:
f (generic function with 5 methods)
In [43]:
f(L1),f(L2)
now in f(x::Line{T}) with x=Line{Int64}(23,3,4,44,"Julia") of type Line{Int64}

now in f(x::Line{T}) with x=Line{Float64}(23.777,3.5,4.1,4.0,"Rolf") of type Line{Float64}
Out[43]:
("Julia","Rolf")

Arrays

In [44]:
A=[1,2,9]
Out[44]:
3-element Array{Int64,1}:
 1
 2
 9
In [45]:
A == 2
Out[45]:
false
In [46]:
A .== 2
Out[46]:
3-element BitArray{1}:
 false
  true
 false
In [47]:
A+2.1
Out[47]:
3-element Array{Float64,1}:
  3.1
  4.1
 11.1
In [48]:
B=[1 2 3; 6 7 4; 6 2 1]
Out[48]:
3x3 Array{Int64,2}:
 1  2  3
 6  7  4
 6  2  1
In [49]:
C=[ i*j for i in 1:3, j in 1:4 ]
Out[49]:
3x4 Array{Int64,2}:
 1  2  3   4
 2  4  6   8
 3  6  9  12
In [50]:
B * A
Out[50]:
3-element Array{Int64,1}:
 32
 56
 19
In [51]:
#B =rand(500,500)
@time eigvals(B)
  0.417821 seconds (288.68 k allocations: 12.865 MB)
Out[51]:
3-element Array{Float64,1}:
 10.6722 
 -3.25534
  1.58311
In [52]:
@time svd(B)
  0.195268 seconds (117.06 k allocations: 5.161 MB)
Out[52]:
(
3x3 Array{Float64,2}:
 -0.258442  -0.523252  -0.812044
 -0.83751   -0.297569   0.45829 
 -0.481441   0.798536  -0.361324,

[11.912870002845768,3.5155113987276136,1.3132813486506532],
3x3 Array{Float64,2}:
 -0.685993   0.70617   -0.175321
 -0.616336  -0.435902   0.655835
 -0.386708  -0.557955  -0.734264)

Dictionary

In [53]:
d = Dict( "A"=>6, "B"=> 20)
Out[53]:
Dict{ASCIIString,Int64} with 2 entries:
  "B" => 20
  "A" => 6
In [54]:
d["A"]
Out[54]:
6
In [55]:
for (key,value) in d
    print("$key ----> $value\n")
end
B ----> 20
A ----> 6

Regular Expressions

In [56]:
my_regex = r"[ABC]"

@show ismatch(my_regex,"Foo")
@show my_regex("Bar")
@show my_regex("boo");
ismatch(my_regex,"Foo") = false
my_regex("Bar") = true
my_regex("boo") = false
In [57]:
my_regex =r"(\d+)g"
match(my_regex,"take 500g sugar")
Out[57]:
RegexMatch("500g", 1="500")

Flow Control

In [58]:
for i in 1:5 
    println(i)
end
1
2
3
4
5
In [59]:
for i in 1:10
    if i % 3 == 0
        println(i)
    end
end
3
6
9

Packages

Lots of packages are available - see http://pkg.julialang.org for a list.

To view all locally installed packages:

In [60]:
Pkg.status()
21 required packages:
 - ArgParse                      0.3.0
 - Atom                          0.4.2+             julia-0.4
 - Bio                           0.2.0+             master
 - Bokeh                         0.2.0
 - Cairo                         0.2.33
 - DataFrames                    0.7.5              master
 - DimensionalityReduction       0.1.1
 - DistributedArrays             0.2.0
 - GR                            0.13.0
 - Gadfly                        0.4.2
 - IJulia                        1.1.10
 - Jewel                         1.0.8
 - Optim                         0.5.0
 - PlotlyJS                      0.3.1
 - Plots                         0.8.0              master
 - ProfileView                   0.1.3
 - PyPlot                        2.2.0
 - RCall                         0.5.0
 - RDatasets                     0.1.3
 - Rsvg                          0.0.2
 - UnicodePlots                  0.1.3
82 additional packages:
 - ArndtLabJulia                 0.0.0-             master (unregistered)
 - ArrayViews                    0.6.4
 - AutoHashEquals                0.0.9
 - BinDeps                       0.3.21
 - Blink                         0.3.4
 - Blosc                         0.1.5
 - BufferedStreams               0.1.5
 - Calculus                      0.1.15
 - Clustering                    0.5.0
 - CodeTools                     0.3.0+             master
 - Codecs                        0.1.5
 - ColorTypes                    0.2.3
 - Colors                        0.6.5
 - Compat                        0.8.4
 - Compose                       0.4.2
 - Conda                         0.2.2
 - Contour                       0.1.0
 - DataArrays                    0.3.6              master
 - DataStructures                0.4.4
 - Dates                         0.4.4
 - Distances                     0.3.1
 - Distributions                 0.9.0
 - Docile                        0.5.23
 - DualNumbers                   0.2.2
 - FactCheck                     0.4.3
 - FileIO                        0.1.0
 - FixedPointNumbers             0.1.3
 - FixedSizeArrays               0.2.2
 - GZip                          0.2.19
 - Graphics                      0.1.3
 - Grid                          0.4.0
 - Gtk                           0.10.1
 - GtkUtilities                  0.0.9
 - HDF5                          0.6.3
 - Hexagons                      0.0.4
 - Hiccup                        0.0.2
 - Homebrew                      0.3.1
 - HttpCommon                    0.2.6
 - HttpParser                    0.1.1
 - HttpServer                    0.1.5
 - IndexableBitVectors           0.1.1
 - IntervalTrees                 0.0.5
 - Iterators                     0.1.9
 - JLD                           0.6.3
 - JSON                          0.6.0
 - JuliaParser                   0.6.4
 - KernelDensity                 0.1.2
 - LNR                           0.0.2
 - LaTeXStrings                  0.1.6
 - Lazy                          0.10.1
 - LegacyStrings                 0.1.1
 - Libz                          0.1.1
 - LightGraphs                   0.5.5
 - LightXML                      0.2.1
 - Loess                         0.0.6
 - MacroTools                    0.3.0
 - MbedTLS                       0.2.4
 - Measures                      0.0.2
 - Media                         0.1.2
 - Mustache                      0.0.15
 - Mux                           0.2.0
 - NaNMath                       0.2.1
 - Nettle                        0.2.3
 - PDMats                        0.4.2
 - ParserCombinator              1.7.8
 - PlotUtils                     0.0.3
 - PositiveFactorizations        0.0.1
 - PyCall                        1.6.3
 - RecipesBase                   0.0.6
 - Reexport                      0.0.3
 - Requires                      0.2.2
 - SHA                           0.1.2
 - Showoff                       0.0.7
 - SortingAlgorithms             0.0.6
 - StatsBase                     0.8.3
 - StatsFuns                     0.2.2
 - SubstitutionModels            0.0.0-             master (unregistered)
 - TextWrap                      0.1.5
 - URIParser                     0.1.5
 - WebSockets                    0.1.2
 - WoodburyMatrices              0.1.5
 - ZMQ                           0.3.2

To add a package (and its dependencies):

In [ ]:
Pkg.add("DataFrames")

To update installed packages to the last release:

In [ ]:
Pkg.update()

To stay on the top of the master branch:

In [ ]:
Pkg.checkout("DataFrames")
In [ ]:
#Pkg.generate("someName")

DataFrames

In [61]:
using DataFrames
d = DataFrame(x = 21:30, y = rand(10))
Out[61]:
xy
1210.5377985654352477
2220.8080711876006776
3230.9541549857881655
4240.33134837317344057
5250.8443261865933054
6260.4257533982368864
7270.8457130119066063
8280.3766193582981403
9290.9043218651444398
10300.5068968925566211
In [62]:
mean(d[:x]), mean(d[:y])
Out[62]:
(25.5,0.653500382473353)
In [63]:
using StatsBase
cov(d[:x], d[:y])
Out[63]:
-0.07597318459490848
In [64]:
using RDatasets
iris = dataset("datasets", "iris")
@show size(iris)
head(iris)
size(iris) = (150,5)
Out[64]:
SepalLengthSepalWidthPetalLengthPetalWidthSpecies
15.13.51.40.2setosa
24.93.01.40.2setosa
34.73.21.30.2setosa
44.63.11.50.2setosa
55.03.61.40.2setosa
65.43.91.70.4setosa
In [65]:
by(iris, :Species, size)
Out[65]:
Speciesx1
1setosa(50,5)
2versicolor(50,5)
3virginica(50,5)
In [66]:
using DimensionalityReduction
iris_S = convert(Array,DataArray(iris[:,1:4]))
Xpca = pca(iris_S)
WARNING: Base.FloatingPoint is deprecated, use AbstractFloat instead.
  likely near /Users/arndt/.julia/v0.4/DimensionalityReduction/src/nmf.jl:10
WARNING: Base.FloatingPoint is deprecated, use AbstractFloat instead.
  likely near /Users/arndt/.julia/v0.4/DimensionalityReduction/src/nmf.jl:10
Out[66]:
Rotation:
4x4 Array{Float64,2}:
  0.360172   -0.608141    0.731739   0.14552  
 -0.0979983  -0.783089   -0.13081   -0.0362072
  0.855279   -0.0841303  -0.308115  -0.951556 
  0.359402   -0.0992901  -0.593727   0.268428 

Scores:
150x4 Array{Float64,2}:
 -2.25714   -0.478424    0.12728     0.0240875 
 -2.07401    0.671883    0.233826    0.102663  
 -2.35634    0.340766   -0.0440539   0.0282823 
 -2.29171    0.5954     -0.0909853  -0.0657353 
 -2.38186   -0.644676   -0.0156856  -0.0358029 
 -2.0687    -1.48421    -0.0268782   0.00658612
 -2.43587   -0.0474851  -0.33435    -0.0366528 
 -2.22539   -0.222403    0.0883994  -0.0245299 
 -2.32685    1.1116     -0.144592   -0.0267695 
 -2.17703    0.467448    0.252918   -0.0397661 
 -2.15908   -1.04021     0.267784    0.0166755 
 -2.31836   -0.132634   -0.0934462  -0.133038  
 -2.21104    0.726243    0.23014     0.00241694
  â‹®                                            
  0.921737  -0.0171656  -0.415434    0.00522092
  1.84586   -0.673871    0.0126298   0.194544  
  2.00808   -0.611836   -0.426903    0.246712  
  1.89543   -0.687273   -0.129641    0.468128  
  1.15402    0.696536   -0.52839    -0.0403855 
  2.03374   -0.864624   -0.337015    0.0450363 
  1.99148   -1.04567    -0.630302    0.213331  
  1.86426   -0.385674   -0.255418    0.387957  
  1.55936    0.893693    0.0262833   0.219457  
  1.51609   -0.268171   -0.179577    0.118773  
  1.3682    -1.00788    -0.930279    0.0260414 
  0.957448   0.0242504  -0.526485   -0.162534  

Reconstruction:
150x4 Array{Float64,2}:
 -0.42537     0.578323   -1.95237  -0.832823
 -0.969562   -0.357198   -2.00012  -0.923388
 -1.08404    -0.0311948  -2.05733  -0.84696 
 -1.26364    -0.227386   -1.91956  -0.846387
 -0.482515    0.741605   -1.94402  -0.792335
  0.138808    1.36827    -1.64244  -0.578403
 -1.09845     0.32096    -1.94146  -0.682068
 -0.605156    0.381571   -1.88852  -0.836799
 -1.62378    -0.622574   -2.0136   -0.867983
 -0.889098   -0.184352   -1.94139  -0.989683
  0.0533268   0.990527   -1.85748  -0.82721 
 -0.842088    0.348101   -1.8163   -0.800286
 -1.06926    -0.382227   -2.02537  -1.00276 
  â‹®                                         
  0.0391933  -0.0227324   0.91282   0.581035
  1.11219     0.338113    1.44641   0.775038
  0.818858    0.329244    1.66572   1.10215 
  1.0739      0.352455    1.27344   0.952093
 -0.400468   -0.58796     1.12964   0.648476
  1.01826     0.520228    1.87315   1.02897 
  0.923014    0.698414    1.78245   1.25106 
  0.775554    0.138687    1.33644   0.9641  
  0.0693136  -0.864039    1.04157   0.515005
  0.59502     0.0806172   1.26155   0.710015
  0.428789    0.775924    1.51684   1.15113 
 -0.0788028  -0.038064    1.13372   0.610661

Standard Deviations:
[1.7026570894991693,0.9528572479608981,0.38180950276393044,0.14344593936077485]

Proportion of Variance:
[0.7296244541329988,0.22850761786701754,0.03668921889282881,0.005178709107154805]

Cumulative Variance:
[0.7296244541329988,0.9581320720000163,0.9948212908928451,1.0]

Graphics - PlotlyJS

In [67]:
using PlotlyJS
using Colors
using Blink
#using Rsvg

#init_notebook()

Plotly javascript loaded.

To load again call

init_notebook()

In [68]:
nms = unique(iris[:Species])
colors = [RGB(0.89, 0.1, 0.1), RGB(0.21, 0.50, 0.72), RGB(0.28, 0.68, 0.3)]

data = GenericTrace[]

for (i, nm) in enumerate(nms)
    sc = Xpca.scores[iris[:Species] .== nm, :]
    x=sc[:,1]
    y=sc[:,2]
    
    trace = scatter(;name=nm, mode="markers",
                           marker_size=10, marker_color=colors[i], marker_line_width=0,
                           x=x, y=y)
    push!(data, trace)
end

layout = Layout(width=800, height=550, autosize=false, title="Iris dataset",
            xaxis=attr(title="PC1"),
            yaxis=attr(title="PC2")
            )


p1=PlotlyJS.plot(data, layout)
Out[68]:
In [69]:
typeof(p1)
Out[69]:
PlotlyJS.SyncPlot{PlotlyJS.JupyterDisplay}
In [ ]:
#PlotlyJS.savefig(p1, "output_filename.pdf")
In [70]:
function clustering_alpha_shapes()
    @eval using DataFrames, RDatasets, Colors

    # load data
    iris = dataset("datasets", "iris")
    nms = unique(iris[:Species])
    colors = [RGB(0.89, 0.1, 0.1), RGB(0.21, 0.50, 0.72), RGB(0.28, 0.68, 0.3)]

    data = GenericTrace[]

    for (i, nm) in enumerate(nms)
        df = iris[iris[:Species] .== nm, :]
        x=df[:SepalLength]
        y=df[:SepalWidth]
        z=df[:PetalLength]
        trace = scatter3d(;name=nm, mode="markers",
                           marker_size=3, marker_color=colors[i], marker_line_width=0,
                           x=x, y=y, z=z)
        push!(data, trace)

        cluster = mesh3d(;color=colors[i], opacity=0.3, x=x, y=y, z=z)
        push!(data, cluster)
    end

    # notice the nested attrs to create complex JSON objects
    layout = Layout(width=800, height=550, autosize=false, title="Iris dataset",
                    scene=attr(xaxis=attr(gridcolor="rgb(255, 255, 255)",
                                          zerolinecolor="rgb(255, 255, 255)",
                                          showbackground=true,
                                          backgroundcolor="rgb(230, 230,230)"),
                               yaxis=attr(gridcolor="rgb(255, 255, 255)",
                                           zerolinecolor="rgb(255, 255, 255)",
                                           showbackground=true,
                                           backgroundcolor="rgb(230, 230,230)"),
                               zaxis=attr(gridcolor="rgb(255, 255, 255)",
                                           zerolinecolor="rgb(255, 255, 255)",
                                           showbackground=true,
                                           backgroundcolor="rgb(230, 230,230)"),
                               aspectratio=attr(x=1, y=1, z=0.7),
                               aspectmode = "manual"))
    PlotlyJS.plot(data, layout)
end
clustering_alpha_shapes()
Out[70]:
In [71]:
using RCall
In [72]:
R"rnorm(10)"
Out[72]:
RCall.RObject{RCall.RealSxp}
 [1] -1.80862226  0.52612679 -0.30935616  0.16500632 -0.07844949 -0.32057512
 [7]  0.37502888  0.50023911  0.36234372 -0.09131232
In [73]:
L = 8
@rput L
R"v <- rnorm(L)"
@rget v
v
Out[73]:
8-element Array{Float64,1}:
  1.60154  
  0.360681 
  0.552795 
 -0.0579874
  0.0642647
 -0.468992 
  1.00036  
  1.91809  
In [74]:
rcopy(R"t.test(rnorm(1000))")
Out[74]:
Dict{Symbol,Any} with 9 entries:
  :statistic           => -1.0314510079327326
  symbol("conf.int")   => [-0.09349989910375861,0.0290728780803126]
  :alternative         => "two.sided"
  :parameter           => 999.0
  symbol("p.value")    => 0.3025790137321886
  :method              => "One Sample t-test"
  symbol("data.name")  => "rnorm(1000)"
  symbol("null.value") => 0.0
  :estimate            => -0.032213510511723005
In [75]:
ccall( (:clock, "libc"), Int32, ())
Out[75]:
42531843

Calling external programs

In [76]:
run(`echo Hello`)
Hello
In [77]:
file = "HelloWorld.jl"

run(`cat $file`)
#!/usr/bin/env julia

println("Hello World!")

BioJulia

have a look here https://github.com/BioJulia

In [78]:
using Bio.Seq

seq = dna"ACGTTT"
Out[78]:
6nt DNA Sequence:
ACGTTT
In [79]:
reverse_complement(seq)
Out[79]:
6nt DNA Sequence:
AAACGT
In [80]:
seq = dna"ACAGCGTAGCT";
@show approxsearch(seq, dna"AGGG", 0)
@show approxsearch(seq, dna"AGGG", 1)
@show approxsearch(seq, dna"AGGG", 2);
approxsearch(seq,@dna_str("AGGG"),0) = 0:-1
approxsearch(seq,@dna_str("AGGG"),1) = 3:6
approxsearch(seq,@dna_str("AGGG"),2) = 1:4

Vectorization

In [81]:
L = 100000000
A = rand(1:1000,L)
B = rand(1:1000,L)

function sum1!(A,B)
    for i in 1:length(A)
        A[i] = A[i]+B[i]
    end
    A
end

function sum2!(A,B)
    @simd for i in 1:length(A)
        @inbounds A[i] = A[i]+B[i]
    end
    A
end



@time A+=B;

@time sum1!(A,B)

@time sum2!(A,B)

;
  0.796266 seconds (8.56 k allocations: 763.363 MB, 15.37% gc time)
  0.244972 seconds (2.16 k allocations: 108.509 KB)
  0.185907 seconds (4.43 k allocations: 220.242 KB)

Parallel Computing - Multiple Processors

In [82]:
procs()
Out[82]:
1-element Array{Int64,1}:
 1
In [83]:
addprocs(2)
procs()
Out[83]:
3-element Array{Int64,1}:
 1
 2
 3
In [84]:
r = remotecall(2, rand, 2, 2)
Out[84]:
RemoteRef{Channel{Any}}(2,1,3)
In [85]:
fetch(r)
Out[85]:
2x2 Array{Float64,2}:
 0.25081   0.223139
 0.529448  0.892006
In [ ]:
#addprocs(fill("ergophobie.molgen.mpg.de",10),dir="/home/arndt",exename="/usr/bin/julia")
nheads() = @parallel (+) for i=1:2000000000
             Int(rand(Bool))
            end

@time nheads()

Graphics - Plots

In [86]:
using Plots
#plotlyjs()
gr()
#unicodeplots()
WARNING: using Plots.scatter in module Main conflicts with an existing identifier.
WARNING: using Plots.scatter3d in module Main conflicts with an existing identifier.
Out[86]:
Plots.GRBackend()
In [87]:
Plots.plot(rand(10,5),w=4)
[Plots.jl] Initializing backend: gr
Out[87]:
0.2 0.4 0.6 0.8 2 4 6 8 10 y1 y2 y3 y4 y5

Probabilistic Programming

https://github.com/JuliaStats/PGM.jl

Tasks

offer python's yield like behavior

In [ ]:
function producer(max::Int = 4)
    for i in 1:max
        produce(i)
    end
end

for i in Task(producer)
    println(i)
end
In [ ]:
for i in Task(() -> producer(2))
    println(i)
end