A Monad for concurrency

This code is an attempt to port the Haskell code in A poor man’s concurrency monad to OCaml.

The main idea here, is about using a monadic interface to compose (CPS’d) functions. The resulting program can be run concurrently by interleaving and evaluating, on-demand, its constituents.

module Conc = 
struct
  type action = Atom of zaction
                | Fork of (zaction * zaction)
                | Stop
  and zaction  = action lazy_t
 
  (* CPS monad *)
  (* type 'a t = ('a -> action) -> action *)
  let bind f k = fun c -> f (fun a -> k a c)
  let return x = fun c -> c x  
 
  let atom f = fun c -> Atom (lazy (let b = f () in c b))
  let action f = f (fun () -> Stop)
  let fork f = fun c -> Fork (lazy (action f), lazy (c ()))
  let par m1 m2 = fun c -> Fork (lazy (m1 c), lazy (m2 c)) 
  let stop = fun c -> Stop
  let rec round = function
    | [] -> ()
    | (x::xs) -> match x with
        | Atom th -> let y = Lazy.force th in round (xs @ [y])
        | Fork (a1, a2) -> round (xs @ [Lazy.force a1; Lazy.force a2])
        | Stop -> round xs
 
  let run m = round [action m]
end
 
open Conc
 
(* given f and a number n, returns f composed with itself
   n times *)
let rec loop f = function
  | 0 -> stop
  | n -> bind f (fun () -> loop f (n-1))
 
(* example 1, prints "start", then prints the words "cat" 
   and "fish", interleaved, 10 times*)
let write1 s = atom (fun _ -> print_string s)
let example1 : ((unit -> action) -> action) = 
  bind (write1 "start>>>") 
    (fun _ -> bind (fork (loop (write1 "cat") 10)) 
       (fun _ -> (loop (write1 "fish") 10)))
 
 
let explode s =
  let rec exp i l =
    if i < 0 then l else exp (i - 1) (s.[i] :: l) in
  exp (String.length s - 1) []
 
(* write2: string -> ('a -> action) -> action
   prints each character one after the other *)
let write2 s = 
  let rec write2_aux = function
    | []    -> atom (fun _ -> ())
    | c::cs -> bind (atom (fun _ -> print_char c)) 
                    (fun _ -> write2_aux cs)
  in write2_aux (explode s)
 
(* write2: given a string, returns a CPSed function
   which prints each character one after the other *)
let example2 : ((unit -> action) -> action) = 
  bind (write2 "start>>>") 
    (fun _ -> bind (fork (loop (write2 "cat") 10)) 
       (fun _ -> (loop (write2 "fish") 10)))
 
let _ = 
  run example1; 
  print_newline ();
  run example2

Dataflow variables in Erlang

Implementing dataflow variables in Erlang, as library, is oddly easy.

Here is the whole code:

-module(prog).
 
-export([set/1, get/1, main/0]).
 
-define(DF_SET(Exp), prog:set(fun() -> Exp end)).
-define(DF_GET(Var), prog:get(Var)).
 
%% Creates a new dataflow var
set(Fun) ->
    spawn(fun() -> Res = Fun(), resolved(Res) end).
 
%% Loop to keep the result accessible
resolved(Res) ->
    receive
        {get, Ref, Pid} -> Pid ! {value, Ref, Res}
    end,
    resolved(Res).
 
%% Blocks until the future has evaluated
get(Var) ->
    Ref = make_ref(),
    Var ! {get, Ref, self()},
    receive
        {value, Ref, Res} -> Res
    end.
 
%% Time consuming friend
fib(0) -> 0;
fib(1) -> 1;
fib(N) when N > 1 -> fib(N-1) + fib(N-2).
 
main() ->
    X = ?DF_SET(fib(30)),
    Y = ?DF_SET(fib(20)),
    io:format("Computation starts, we do something else in the meantime...~n"),
    io:format("Accessing the result will block until it's resolved~n"),
    io:format("Result is: ~w ~n", [?DF_GET(X) + ?DF_GET(Y)]),
    io:format("Subsequent accesses will be instantaneous~n").

How does it work ?

We can implement a dataflow variable with an Erlang process, because:

  1. Erlang processes are really cheap, so we can afford to spawn one for each variable.
  2. a process can block to wait for a specific message. We use this to block the current process when we ‘look up’ a variable whose value is not yet available.

Breaking it down

To create a dataflow variable, you provide a computation:

X = prog:set(fun() -> fib(30) end)

This spawns a process p which starts executing fib(30) right away (see Res = Fun() in set(Fun)).

When the current process try to access X’s value:

Result = prog:get(X)

it sends a lookup message to p:

 X ! {get, Ref, self()}

and blocks (because of receive ... end) until p responds.

If p is not done computing the result of fib(30), the message sits in its mailbox.

When p is done,  it enters an infinite loop (the resolved function), and responds to pending / incoming lookups.

Syntax

We can improve syntax slightly with some help from the following macros:

-define(DF_SET(Exp), prog:set(fun() -> Exp end)).
-define(DF_GET(Var), prog:get(Var)).

we can now write

X = ?DF_SET(Exp)

instead of

X = prog:set(fun() -> Exp end)

and

?DF_GET(X)

instead of

prog:get(X)

Straight-line program interpreter

Chapter 1 of the Tiger book

type id = string
 
type binop = Plus | Minus | Times | Div
 
type stm = CompoundStm of stm * stm
	 | AssignStm of id * exp
	 | PrintStm of exp list
 
and exp = IdExp of id
	| NumExp of int
        | OpExp of exp * binop * exp
        | EseqExp of stm * exp
 
(* 
prog:
a = 5 + 3; 
b = (print [a, a-1]; 10 * a);
print b;
*)
let prog = 
 CompoundStm(
   AssignStm("a", 
             OpExp(NumExp 5, Plus, NumExp 3)),
   CompoundStm(
     AssignStm("b",
       EseqExp(PrintStm[IdExp "a"; OpExp(IdExp "a", Minus, NumExp 1)],
               OpExp(NumExp 10, Times, IdExp "a"))),
     PrintStm[IdExp "b"]))
 
 
(* 1: maxargs *)
let rec maxargs stm = 
  match stm with
    | CompoundStm (s1, s2) -> max (maxargs s1) (maxargs s2)
    | AssignStm (_, e) -> maxargs_exp e
    | PrintStm l -> let max_aux i e = max i (maxargs_exp e)
                    in let maxsub = List.fold_left max_aux  0 l
                    in max (List.length l) maxsub
and maxargs_exp exp =
  match exp with
    | IdExp _ -> 0
    | NumExp _ -> 0
    | OpExp (e1, _, e2) -> max (maxargs_exp e1) (maxargs_exp e2)
    | EseqExp (s, e) -> max (maxargs s) (maxargs_exp e)
 
 
(* 2: interpreter *)
type table = (id * int) list 
 
let rec lookup id env =
  match env with
    | [] -> raise Not_found
    | (x,v)::_ when x = id -> v
    | _::p -> lookup id p 
 
let set x v env = (x, v) :: env
 
let apply op v1 v2 = 
  match op with
    | Plus -> v1 + v2
    | Minus -> v1 - v2
    | Times -> v1 * v2
    | Div -> v1 / v2
 
let rec interp_stm expr env =
  match expr with 
    | CompoundStm (s1, s2) -> let env' = interp_stm s1 env
                              in interp_stm s2 env'
    | AssignStm (id, expr') -> let res, env' = interp_exp expr' env
                               in set id res env
    | PrintStm exprs -> 
      let print_exp env exp = 
        let v, env' = interp_exp exp env
        in print_string (string_of_int v ^ " "); env'   
      in let env' = List.fold_left print_exp env exprs
         in print_string "\n"; env'
 
and interp_exp expr env = 
  match expr with
    | IdExp id -> lookup id env, env 
    | NumExp n -> n, env
    | OpExp (e1, op , e2) -> let v1, env1 = interp_exp e1 env
                             in let v2, env2 = interp_exp e2 env1
                                in apply op v1 v2, env2
    | EseqExp (s, e) -> let env' = interp_stm s env 
                        in interp_exp e env'
 
let _ = 
  print_endline (string_of_int (maxargs prog)); (* => 2 *)
  interp_stm prog []                            (* => 8 7\n 80 *)

Set ADT in OCaml

In chapter 2 of Okasaki’s book, ML’s module system is used to implement an abstract polymorphic Set datatype. We parameterize our set implementation over a “comparable” interface that the set elements must implement.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
(* "comparable" interface *)
module type Ord =
sig
  type t
  val lt : t -> t -> bool
  val lte : t -> t -> bool
  val eq : t -> t -> bool
end
 
(* implementation of Ord for integers *)
module IntOrd : Ord with type t = int =
struct
  type t = int
  let lt = (<)
  let lte = (<=)
  let eq = (=)
end
 
(* Set interface *)
module type Set =
sig
  (* abstract element and set type *)         
  type elem 
  type set  
  (* set operations *)
  val empty : set
  val insert : elem -> set -> set
  val member : elem -> set -> bool
end
 
(* implementation of a set paramterized over Ord *)
module UnbalSet(Elt : Ord) : Set with type elem = Elt.t =
struct
  type elem = Elt.t
  type 'a tree = 
      Leaf 
    | Node of 'a tree * 'a * 'a tree
  type set = elem tree 
 
  let empty = Leaf
  let rec insert x = function
      Leaf -> Node (Leaf, x, Leaf)
    | Node (a, y, b) as s -> 
        if Elt.lt x y then Node (insert x a, y, b)
        else if Elt.lt y x then Node (a, y, insert x b) 
        else s
 
  let rec member x = function
      Leaf -> false
    | Node (a, y, b) ->
        if Elt.lt x y then member x a               (* two comparisons *)
        else if Elt.lt y x then member x b
        else true
end
 
(* instanciation of a set of comparable integers *)
module IntSet = UnbalSet(IntOrd)

Exercise 2.1 asks to optimize the member operation, by assuming that equality is uncommon we can mostly get away with only one comparison, comparing for equality only when we reached the end of a branch (worst case is now d+1 instead of 2*d comparisons, where d is the depth of the tree):

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
module type Ord =
sig
  type t
  val lt : t -> t -> bool
  val lte : t -> t -> bool
  val eq : t -> t -> bool
end
 
module IntOrd : Ord with type t = int =
struct
  type t = int
  let lt = (<)
  let lte = (<=)
  let eq = (=)
end
 
module type Set =
sig
  type elem
  type set
  val empty : set
  val insert : elem -> set -> set
  val member : elem -> set -> bool
end
 
module UnbalSet(Elt : Ord) : Set with type elem = Elt.t =
struct
  type elem = Elt.t
  type 'a tree = 
      Leaf 
    | Node of 'a tree * 'a * 'a tree
  type set = elem tree 
 
  let empty = Leaf
  let rec insert x = function
      Leaf -> Node (Leaf, x, Leaf)
    | Node (a, y, b) as s -> 
        if Elt.lt x y then Node (insert x a, y, b)
        else if Elt.lt y x then Node (a, y, insert x b) 
        else s
 
  let member x s = 
    let rec member_aux x s z =
      match s with
          Leaf -> x = z  (* comparing for equality when reaching a leaf *)
        | Node(a, y, b) -> 
            if Elt.lt x y then member_aux x a z   (* only one comparison *)
            else member_aux x b y
    in
      match s with
          Leaf -> false
        | Node(_, y, _) -> member_aux x s y             
end
 
module IntSet = UnbalSet(IntOrd)

Loops and state with pure functions

How would you write a loop in Python without ‘for/while’ ?

this:

i = 0
while i < 10:
   print i
   i = i +1

can be replaced by a recursive function like this:

def f(i):
   if i < 10: 
       print i
       f(i+1)
 
f(0)

In fact we can do to other variables what we’ve just done with the loop counter i, use a function parameter to pass new values

j = []
i = 0
while i < 10:
   j.append(i)
   i = i +1
 
print j

becomes

def f(i, j):
   if i < 10:  
       return f(i+1, j + [i])
   else:
       return j
 
print f(0, [])

So functions let us loop and change state.
While these transformations are pretty simple, they can be tricky to apply on a bigger piece of code…

Here is the Python code I wanted to rewrite:

def tokenize(s):
    "Convert a string into a list of tokens."
    return s.replace('(',' ( ').replace(')',' ) ').split()
 
def read_from(tokens):
    "Read an expression from a sequence of tokens."
    if len(tokens) == 0:
        raise SyntaxError('unexpected EOF while reading')
    token = tokens.pop(0)
    if '(' == token:
        L = []
        while tokens[0] != ')':
            L.append(read_from(tokens))
        tokens.pop(0) # pop off ')'
        return L
    elif ')' == token:
        raise SyntaxError('unexpected )')
    else:
        return token
 
print read_from(tokenize("(*(+ 3 4)(+ 6 7))"))

And here is the version without loop construct and mutation…

def tokenize(s):
    "Convert a string into a list of tokens."
    return s.replace('(',' ( ').replace(')',' ) ').split()
 
def parse_fun(tokens):
    _, ast = parse_fun_aux(tokens)
    return ast
 
def parse_fun_aux(tokens):
    if len(tokens) == []:
        raise SyntaxError('unexpected EOF')
    if tokens[0] == '(' :
        return loop(tokens[1:],[])
    elif tokens[0] == ')' :
        raise SyntaxError('unexpected )')
    else:
        return (tokens[1:], tokens[0])
 
def loop(tokens, ast):
    if tokens[0] == ')':
        return (tokens[1:], ast)
    else:
        tokens_left, token = parse_fun_aux(tokens)
        return loop(tokens_left, ast + [token])
 
print parse_fun(tokenize("(*(+ 3 4)(+ 6 7))"))

Workers unite!

Web Workers are part of HTML 5 and already featured in Firefox 3.5 and Safari 4. A worker is a script that can be dynamically loaded by the main page (or other workers). it runs in isolation in a background thread and only communicate with its caller through string messages.

They can be used to to increase user interface responsiveness by taking care of computations that would otherwise run in the main UI thread.

The UI freeze

Let’s see what happens when we run a long computation in the main thread. Try clicking on all checkboxes during the computation and notice how all clicks are queued and processed once the computation finishes (read about asynchronous events and timers).

Workers to the rescue!

Let’s run the same computation but this time inside a worker. Again, try to click the checkboxes (you need Firefox 3.5 or Safari 4), see how the UI responds immediately?

The setTimeout pattern

Traditionally, to prevent blocking the browser, one would break long computations into smaller ones and use setTimeout to schedule execution as Julien Lecompte explains.

He applies this method to sort an array. Every pass through the array is scheduled to run with setTimeout:

function sort (progressFn) {
  var i = 0;
  (function () {
    ... // sorting code here
    i++;
    progressFn(i, length);
    // schedule the current function for execution
    if (i < length) {
      setTimeout(arguments.callee, 0);
    }
  })();
}

Try it : sort without worker

Sort with a Worker

Now let’s put his sorting code into a worker, it should run a lot faster (no callback overhead) and has the advantage of not mixing scheduling with sorting code:

function sort (progressFn) {
  i = 0;
  var sort_loop = function () {
    ... // sorting code here
    i++;
    progressFn(i, length);
    if (i < length) {
      sort_loop();
    }
  };
  sort_loop();
}

Try it: sort with worker (you need Firefox 3.5).

There you go, UI responsiveness without the pain of chopping your code into digestive chunks !

While Workers are not yet implemented in all browsers (the spec is still evolving), be prepared! By allowing us to separate UI from application logic and giving us multiple threads of execution, Workers will help us build bigger and better web applications… and maybe overthrow that funky boss !

Related links

How Javascript Timers Work

Running CPU Intensive JavaScript Computations in a Web Browser

Computing with Javascript Web Workers

Mozilla Dev Center: Using Web Workers

jQuery’s bind() and live()

To carry on with Javascript, I’m going to talk about a little circular problem I ran into today while dynamically inserting elements into a page.

Let’s say you have a page with a list and you want to do something when someone clicks on the list items:

<ul id="taskList">
   <li>first item</li>
   <li>second item</li>
   ...
</ul>

Typically, with jQuery you would attach handlers to the list items like so:

$(document).ready(function(){
    $('#taskList li').bind('click', function(){
        doSomething();
   });
});

So far so good, but suppose you want to insert a list under the element you click on? You will also need to attach handlers for the items you’ve inserted! Let’s see:

$(document).ready(function(){
    $('#taskList li').bind('click', function(){
        insertSublistUnder(this);
        // now let's attach the new handlers
        $(this).find('li').bind('click', function(){
             insertSublistUnder(this);
             // now let's attach the new handlers...
             // hey, wait a minute !!!!
       });
   });
});

You see the pattern here, it could go on and on !!!
So how do you do this?

Short answer: if your version of jQuery is 1.3+, use live instead of bind to attach the handlers (this ensures that all <li> inserted in the future will have their handlers bound automatically:

$(document).ready(function(){
    $('#taskList li').live('click', function(){
        insertSublistUnder(this);
   });
});

But what do you do if you’re stuck in pre-1.3 land ?
Let’s see, we are attaching handlers which in turn insert some items and… attach some handlers which…
We have a bit of recursion here:

$(document).ready(function(){  <------------------\
    $('#taskList li').bind('click', function(){   |
        insertSublistUnder(this);                 |
        // here call this ------------------------/
        // on the newly inserted items
   });
});

Let’s pack the recursive snippet into a function, say, ‘attachHandlersUnder’, which takes a (clicked) list element as a parameter:

function attachHandlersUnder(li){
   $(li).find('li').bind('click', function(){
        insertSublistUnder(this);
        attachHandlersUnder(this); // recursive definition
   });
} 
 
$(document).ready(function(){
    attachHandlersUnder('#taskList'); //
});

Et voila !
Well, in essence… It is actually a bit more complicated, since calling ‘bind’ repeatedly on an element keeps adding handlers to the ones previously set.

Here’s a file with the final code.

1 + 1 = 11

This post talks about Javascript and the unfortunate combination of two quirks which can cause a few surprises.

Quirk 1: + as addition and concatenation

Javascript, unlike some languages use the same symbol for number addition and string concatenation:

1 + 1    // => 2
'1' + '1' // => '11'

Quirk 2: implicit type conversion

Javascript does not always throw an error when evaluating expressions with an odd mix of values and operators, such as, ’10′ / ’5′, instead it tries to converts values behind the scene (yes, you should be scared). Usually strings involved in arithmetic operations are converted to number, so :

'10' / 5 // => 10 / 5 => 2

So it seems safe to assume that string representation of numbers will be treated as numbers in arithmetic operations, right ? Not so if the + operator is involved! Remember:

'1' + '1'  // => '11'

Form and formula

Suppose we have a form that asks for two numbers, and some Javascript which calculates the average. Let’s say the user types ’1′ and ’1′:

// Form values are strings
var number1 = $('#number1').val(); // '1'
var number2 = $('#number2').val(); // '1'
var average = (number1 + number2) / 2 ; // 5.5 !!

we get 5.5 instead of 1 because (number1 + number2) is concatenation and not addition.

The workaround in that case is to convert the form values to numbers beforehand :

var number1 = Number($('#number1').val()); // 1
var number2 = Number($('#number2').val()); // 1
var average = (number1 + number2) / 2 ; // 1

What makes this especially bad, is that the program appears to be working; it just silently returns the wrong result ! So remember, be cautious when using strings and addition in Javascript…