Friday, January 30, 2015

Unit Test in Node.js application using Sinon.js test spy and stub technique

In the last few months I have written couple blogs about data structure and algorithm. It was fun! Data structure and algorithm are always fun! And I have not written any technology blog. Now today I am going to talk about how I wrote unit tests for a Node.js application I am working on during my weekends and nights.

My node.js is using express framework along with Sails' waterline ORM.  All of the model classes are exposed under global var "Model".

Here is my controller code. 

module.exports = {
    model: 'group', // If no model is specified, CRUD actions won't be inherited
    actions: {
    /**
    * create group by owner
    * 
    * @param {string}   name            group name
    * @param {string}   type            group type
    * @param {string}   project         id of the project the group belongs to optional
    * @param {string}   owner           group owner id
    */
        'post /create': function (req, res, next) {
        var userId = req.param('owner');
        var group = {
        name: req.param('name'),
        type: req.param('type'),
        project: req.param('project'),
        owner: req.param('owner'),
        users: [{id: userId}]
        };
        
        Model.group.create(group, function(err, group){
        if(err)
        return res.json({status: 'error', data: err});
        else{
        //code update user.groups with newly created group
        //should move to service class
        Model.user.findOne(userId, function(err, user){
        if(err)
    console.log("can't find user with id: " + userId);
        else{
        user.groups.push({id: group.id});
        user.save(function(err, re){
        if(err)
        console.log("update user.groups fails: " + user.id + " - " + group.id);
        });
        }
        });
        return res.json({status: 'success', data: group});
        }
        });
        
        },
}


So how I test the controller code without test Model classes? It is unit test! ;)

As Java developer I used to use JMock or EasyMock to mock out the dependencies, or write little stub classes to stub out the dependencies. In node.js world there is something similar: http://sinonjs.org/docs/#sinonspy. Sinonjs is behavior driven testing framework. And with the help of the Mocha, the unit test framework for Node.js. This blog assumes you read the introduction of Mocha and Sinonjs. 

So what I do is to stub out the Model classes and use Sinon spy to check the arguments passed back to respond obj. 

Here is my little stub here can control the behavior of waterline models.

Model.group = {
     create: function(group, callback) {callback(1, [])}
};


And another sub:

req.param = function(v) {return v;};

Now here comes the spy:

spy = res.json = sinon.spy(); 



The following is the complete test code: 

var sinon = require('sinon');
var chai = require('chai');
var assert = require('assert');
var expect = chai.expect;

var group = require('../../../api/controllers/group.js');
Model = {};

describe("Group", function() {
   describe("create group", function() {

       it("should generate error", function() {
           //my little stub here can control the behavior of waterline models.
       Model.group = {
    create: function(group, callback) {callback(1, [])}
       };
       
           var req,res, next, spy;
           req = res = next = {};
           req.param = function(v) {return v;};
           spy = res.json = sinon.spy();      
           group.actions['post /group/create'](req, res, next);
           expect(spy.calledOnce);         
           assert(spy.args[0][0].status === 'error');
           
       });
       
       it("should generate success", function() {
       Model.group = {
    create: function(group, callback) {callback(false, {name:'test'})}
       };
       
       Model.user = {
    findOne: function(user, callback) {callback(false, {groups:[], save: function(){}})}
    };
       
           var req,res, next, spy;
           req = res = next = {};
           req.param = function(v) {return v;};
           spy = res.json = sinon.spy();      
           group.actions['post /group/create'](req, res, next);
           expect(spy.calledOnce);         
           assert(spy.args[0][0].status === 'success');
           
       });

   });
});


The wonderful thing about the dynamic programming language like javascript is that the functions are properties of object and passed around as reference. Note latest Java 8's lambda expression also has function reference. To create stub becomes extreme easy as demonstrated above. 

Happy coding! 

Wednesday, January 28, 2015

Weighted Job Scheduling

Here is the problem description from GeeksforGeeks:

Given N jobs where every job is represented by following three elements of it.
1) Start Time
2) Finish Time.
3) Profit or Value Associated.
Find the maximum profit subset of jobs such that no two jobs in the subset overlap.
Example:
Input: Number of Jobs n = 4
       Job Details {Start Time, Finish Time, Profit}
       Job 1:  {1, 2, 50} 
       Job 2:  {3, 5, 20}
       Job 3:  {6, 19, 100}
       Job 4:  {2, 100, 200} 
Output: The maximum profit is 250.

This is so much similar to the 0-1 knapsack problem.  So we need to find the dynamic function, first of all, we sort the job by end time, then for each job k, DP(k) indicate the maximum profit which includes job[k], the final maximum profit should be the maximum value of DP(k)|0<=k<n.


 public class Job implements Comparable<Job>{
int start, end, profit;
public int compareTo(Job other){
return this.end - other.end;
}
}
public int maxProfit(Job[] jobs){
//table[i] store the max profit including ith job
int[] table = new int[jobs.length];
Arrays.sort(jobs);
table[0] = jobs[0].profit;
for(int k = 1; k<jobs.length; k++){
int max = Integer.MIN_VALUE;
int m = binarySearch(jobs, 0, k-1, jobs[k].start);
while(m>=0)
max = Math.max(max, table[m--]);
table[k] = max + jobs[k].profit;
}
int max = Integer.MIN_VALUE;
for(int k=0; k<jobs.length; k++){
max = Math.max(table[k], max);
}
return max;
}
public int binarySearch(Job[] jobs, int start, int end, int k){
if(jobs[start].end > k)
return start-1;
while(start<end-1){
int mid = (start+end)>>>1;
if(jobs[mid].end > k)
end = mid;
else
start = mid;
}
return jobs[end].end<=k?end:start;
}



Tuesday, January 27, 2015

Order statistics binary search tree

In the past few days, Peng Li (http://allenlipeng47.com/PersonalPage/index/view/117/nkey) and I have embargoed a journey in the Trees (aka forest). Tree is a wonderful data structure, probably the most important data structure along with HashTable. In the last few blogs I talked about "Segment Tree" and its implementation. I also talked about "Skip List", which is the alternative to binary tree.

Now here is one more! Order statistics binary search tree. This is probably one of the simplest high order binary search tree. It is just like binary search tree, except we store additional information in the node, usually this additional information can be computed recursively and bubble-up approach by taking advantaging of the tree's inherent characteristics. 

For example, we can store the size of subtree rooted at each node. 

Node.size = Node.left.size + Node.right.size

Once we have that data, then we can support the following two operations in O(lgN) time.

Select(i) — find the i'th smallest element stored in the tree

Rank(x) – find the rank of element x in the tree, i.e. its index in the sorted list of elements of the tree

public class OrderStatisticTreeQ {
public class Node {
int data;
int size;
Node left;
Node right;
}

/*Select(i) — find the i'th smallest element stored in the tree
Rank(x) – find the rank of element x in the tree, i.e. its index in the sorted list of elements of the tree
*/
//k starts at 0
public int findKthSmallest(Node root, int k){
int level = k;
Node curr = root;
while(curr!=null){
int leftSize = (curr.left ==null? 0 : curr.left.size);
  if(level == leftSize )
return curr.data;
  if(leftSize > level)
curr = curr.left;
  else{
curr = curr.right;
level = level - (leftSize+1);
  }
}
return -1;
}

//index starts at 0
public int findIndexFor(Node root, int value){
Node curr = root;
int passed = 0;
while(curr!=null){
if(curr.data == value){
return (curr.left==null?0:curr.left.size) + passed;
}else if(curr.data > value){
curr = curr.left;
}else{
passed += (curr.left==null?0:curr.left.size) + 1;
curr = curr.right;
}
}
return -1;
}
}

Monday, January 26, 2015

Maximum sum range query using segment tree

There is a classic SPOJ problem which can be solved by segment tree. Here is the link to the problem.
http://www.spoj.com/problems/GSS3/

Basically it asks for any given input of numbers, find the maximum sum in a range (x, y| x<=y), the sum is defined as (A[i]+ A[i+1] + ... + A[j]) where x<=i<=j<=y.

To better understand the following solution, I suggest you read the two blogs before you read the below solutions. : http://blueocean-penn.blogspot.com/2015/01/segment-tree-order-statistics-binary.html and http://blueocean-penn.blogspot.com/2015/01/find-diameter-for-any-tree.html

Please note the maximum sum subarray problem can be solved in linear time like this:
public int maximumSum(int[] nums, int i, int j){

int currSum = nums[i], maxSum = nums[i];
for(int k = i+1; k<=j; k++){
 currSum = Math.max(nums[k], currSum + nums[k]);
 maxSum = Math.max(maxSum, currSum);
}

return maxSum;
}

/* at each node we store 5 piece of info
* data: the input number if at leaf level
* sum: the sum of the all elements at the interval represented by the current node
* prefixSum: the maximum sum of the elements starting from left bound of the current interval
* postfixSum: the maximum sum of the elements ending at right bound of current interval
* bestSum: the maximum sum of the subarrays at current interval
*/

example:

2 --------------------------------- 10
|<------prefixSum ....

2 --------------6 7-----------------10
|<--prefixSum..   |<--prefixSum
|<--- Sum ----->|

prefixSum(2,10) = Max(prefixSum(2,6), Sum(2,6) + prefixSum(7,10)
similar to postfixSum


2 --------------------------------- 10
       |<------BestSum----->|

2 --------------6 7-----------------10
 |<--BestSum-->|   |<--BestSum-->|
 ...postfixSum-->||<--prefixSum

BestSum(2,10) = Max(BestSum(2,6), BestSum(7,10), postfixSum(2,6)+prefixSum(7,10))



public class Node {
int data;
int sum;
int prefixSum;
int postfixSum;
int bestSum;
}
public class SegmentTree {
Node[] tree;
int len;
public SegmentTree(int[] nums){
len = nums.length;
int x = (int) Math.ceil(Math.log10(len)/Math.log10(2));
tree = new Node[(int) (Math.pow(2, x)*2-1)];
constructTree(0, nums, 0, nums.length-1);
}
//Time Complexity for tree construction is O(n). There are total 2n-1 nodes, and value of //every node is calculated only once in tree construction.
private Node constructTree(int current, int[] nums, int start, int end){
if(start==end){
tree[current] = new Node();
tree[current].data = tree[current].sum = tree[current].prefixSum 
= tree[current].postfixSum = tree[current].bestSum = nums[start];
}else{
int left = current*2+1;
int right = current*2+2;
int mid = start + (end-start)/2;
Node l = constructTree(left, nums, start, mid), 
r = constructTree(right, nums, mid+1, end);
 tree[current] = new Node();
tree[current].sum = l.sum + r.sum;
tree[current].prefixSum = Math.max(l.prefixSum, l.sum + r.prefixSum);
tree[current].postfixSum = Math.max(r.postfixSum, l.postfixSum + r.sum);
tree[current].bestSum = Math.max(Math.max(l.bestSum, r.bestSum), l.postfixSum+r.prefixSum);
}
return tree[current];
}
//Time complexity to query is O(lgN). To query a range max sum, we process at most 
//4 nodes at every level and number of levels is O(lgN). 
public int getMaxSum(int l, int r){
Node result =  getMaxSumRec(l, r, 0, 0, len-1);
return result==null? Integer.MIN_VALUE : result.bestSum;
}
private Node getMaxSumRec(int l, int r, int index, int start, int end){
if(l<=start && r>=end)
return tree[index];
else if(end<l || start>r){
return null;
}else{
int mid = start + (end-start)/2;
Node left = getMaxSumRec(l, r, 2*index+1, start, mid), 
right = getMaxSumRec(l, r, 2*index+2, mid+1, end);
if(left==null)
return right;
else if(right==null)
return left;
else{
Node result = new Node();
result.sum = left.sum + right.sum;
result.prefixSum = Math.max(left.prefixSum, left.sum + right.prefixSum);
result.postfixSum = Math.max(right.postfixSum, left.postfixSum + right.sum);
result.bestSum = Math.max(Math.max(left.bestSum, right.bestSum), left.postfixSum+right.prefixSum);
return result;
}
}
}
}

}

Segment tree - an order statistics binary tree

Binary tree like binary search tree is useful for many operations: search any element, min or max, update, delete, they all have time complexity of O(lgN). However in many high order statistics like range query, like find max/min/sum in the range of index from i to j in O(lgN) time, the order statistics binary trees are often used. Segment tree is one of them.

Segment tree is a binary tree which stores all original elements are leaves in sequential order of their indices, and internal nodes of the tree store the aggregate information of their children. The tree is often represented as binary heap-like data structure. Node i's left children is 2*i+1, right children is 2*i+2. The height of the tree is Ceiling(lg(SIZE_OF_INPUT)). For example, array of 8 elements (0, 1, ..., 7), its height will be 3, array of 6 elements, its height will be 3 too.  So the heap storage size will be (1 + 2 + 4 + .. + 2^height) = 2^(height+1)-1. Note the size of the tree is 2*SIZE_OF_INPUT-1.

The nice thing about segment tree is that the operation on it: search, add, update is very similar to binary heap's bubble down operation, merge operation is like heap's bubble up. The index property of parent and children are exactly like binary heap.


/*
* a segment tree representation using array
* note this supports min query only, but can be adopted to other types like sum, max and etc.
*/
public class SegmentTree {
int[] tree;
int len;
public SegmentTree(int[] nums){
len = nums.length;
int x = (int) Math.ceil(Math.log10(len)/Math.log10(2));
tree = new int[(int) (Math.pow(2, x)*2-1)];
constructTree(0, nums, 0, nums.length-1);
}
//Time Complexity for tree construction is O(n). There are total 2n-1 nodes, and value of //every node is calculated only once in tree construction.
private int constructTree(int current, int[] nums, int start, int end){
if(start==end){
tree[current] = nums[start];
}else{
int left = current*2+1;
int right = current*2+2;
int mid = start + (end-start)/2;
tree[current] = Math.min(constructTree(left, nums, start, mid), constructTree(right, nums, mid+1, end));
}
return tree[current];
}
//Time complexity to query is O(Logn). To query a range minimum, we process at most 
//two nodes at every level and number of levels is O(Logn). 
public int getMin(int l, int r){
return getMinRec(l, r, 0, 0, len-1);
}
private int getMinRec(int l, int r, int index, int start, int end){
if(l<=start && r>=end)
return tree[index];
else if(end<l || start>r){
return Integer.MAX_VALUE;
}else{
int mid = start + (end-start)/2;
return Math.min(getMinRec(l, r, 2*index+1, start, mid), getMinRec(l, r, 2*index+2, mid+1, end));
}
}

public void update(int index, int v){
   update(index, v, 0, 0, len-1);

}

//bubble down

private int update(int index, int v, int treeIndex, int start, int end){
   if(index<start || index >end)
return tree[treeIndex];
 
   if(index == start && index == end){
tree[treeIndex] = v;
return v;
  }
         
  int mid = start + (end-start)/2;
tree[treeIndex] = Math.min(update(index, v, 2*treeIndex+1, start, mid), update(index, v, 2*treeIndex+2, mid+1, end));      
return tree[treeIndex];
       
}
}

The segment tree is a static data structure, that is, in order to construct the tree the original input has to be known first. However it can be used to solve on-line or streaming problem. For example, we can initialized the tree to a large size with values for any future incoming elements are set to default values. For example, for min query, we can set the default values to Integer.MIN_VALUE. In this way, any insertion becomes update operation. Of course the drawback of this implementation is that we are wasting some space in the tree implementation.


Friday, January 23, 2015

Skip List - a probabilistic data structure as alternative to balanced search trees

Binary search trees can be great used for search in O(logN) time. However the problem with it is that the tree needs to be balanced otherwise the search can be degenerated to a linear search on a linked list. Balanced search tree like red-back tree or splay tree can help to maintain the tree balanced. However the extra effort and caution have to be taken to make it work. Skip lists are about as fast as highly optimized balanced tree and substantially faster than casually implemented binary search tree.

Skip list is a probabilistic data structure in the way that level of each node is randomly choose as a number below the predefined maximum height.


class Node {
        int data;
Node[] next;
        //l is randomly generated number between 1 and max height
public Node(int l){
this.next = new Node[l];
}
}


Here is one simple coin-flipping algorithm to generate the number:

public int randomLevel(int max){
int level = 1;
while(Math.random()<0.5 && level < max)
level++;
return level;
}

another efficient way to do that and we assume the max <= 32:


public int randHeight(int max){
 int random = new Random().nextInt(); //generate int with 32 bits
 boolean found = false;
 int h = 1;
 while(!found && random > 0){
 if( (random & 1) > 0)
found = true;
 else{
h++;
random = random >>1;
 }
 }
 return Math.max(h, max-1);
}

And search is to compare the next key's value with search value, simply go down the level until we hit bottom level.

//return the value itself if found otherwise -1
public int find(Node head, int v){
Node curr = head;
for(int l = level-1; l>=0; l--){
for(; curr.next[l]!=null && curr.next[l].data<v; curr = curr.next[l]){}
}
return curr.next[0]!=null && curr.next[0].data == v? curr.next[0].data : -1;
}

Monday, January 19, 2015

Minimum cost of sorting an array of positive numbers

This is a Facebook interview problem which can be found at http://www.careercup.com/question?id=9333968.

Here is the problem statement from the site:
Given an array A of positive integers. Convert it to a sorted array with minimum cost. The only valid operation are: 
1) Decrement with cost = 1 
2) Delete an element completely from the array with cost = value of element

Most of array problem can be solved by 1) dynamic programming which usually has time complexity of O(n^2) or above, 2) sliding window which has time complexity of O(n). Then mixing with these two approach there will be various data structures or algorithms can be used, notably, binary search, stack, queue.

My first try is greedy algorithm:

Start from index at 0, using a sliding window, keep on expanding the window until find a the number which is larger than the previous number.
note the numbers in sliding window are sorted except the last one, supposed its value is k
using binary search to find the minimum index of the item larger than k in the sliding windows
now to make the sliding window completely sorted, we have two choices: 1) delete the last one, 2) decrease all of the items from the minimum index to end of sliding window to k. We can calculate the costs of these two choices, choose the one is minimum and then make change to the array

now we can expanding the sliding window again and repeat the above steps until reaching the end of array

However the above solution is not quite right, per Peng Li's comment below.

My second try is dynamic programming:

The idea is that the final sorted array will always end up with one of the original numbers appear in the array. then all of the larger numbers before this number will be decremented,  
dp(i, j), i is the index of the original array a[], j is the index of the sorted array b[] of unique numbers in the array 
 dp(i, j) = min(dp(i-1, 0), dp(i-1, 2), ... dp(i-1, j)) + cost_of_convert(a[i], b[j])

Final answer: min(dp(a.len-1, 0), dp(a.len-1, 1)..., dp(a.len-1, b.len-1)

public static int minSortCostDP(int[] nums){
Set<Integer> unique = new HashSet<Integer>();
for(int i:nums)
unique.add(i);
Set<Integer> sortedSet = new TreeSet<Integer>(unique);
Integer[] sorted = (Integer[]) sortedSet.toArray();
int lenA = nums.length,  lenB = sorted.length;
int[][] table = new int[lenA][lenB];
//first row
for(int col = 0; col<lenB; col++)
table[0][col] = Math.max(0, nums[0]-sorted[col]);
for(int row = 1; row<lenA; row++){
 for(int col = 0; col<lenB; col++){
  //either deletion or decrement
  int cost_of_convert = nums[row]>=sorted[col]? nums[row]-sorted[col]:nums[row];
  table[row][col] = Integer.MAX_VALUE;
  for(int k=0; k<=col; k++){
table[row][col] = Math.min(table[row-1][k]+cost_of_convert, table[row][col]);
  }
 }
}
int minCost = Integer.MAX_VALUE;
//last row
for(int col = 0; col<lenB; col++){
minCost = Math.min(table[lenA-1][col], minCost);
}
return minCost;
}


//Greedy algorithm approach which is not quite right
package org.blueocean;

public class FacebookQ {
//http://www.careercup.com/question?id=9333968
//find the min index of the item in array which large than k
public static int binarySearch(int[] nums, int start, int end, int k){
while(end-start>1){
int mid = (end+start)>>>1;
if(nums[mid]>k)
end = mid;
else
start = mid;
}
return nums[start]>k? start : end;
}
//greedy algorithm
public static int minSortCost(int[] nums){
int cost = 0;
int start = 0;
int end = 1;
int len = nums.length;
while(end<len){
while(end<len && nums[end]>=nums[end-1]){
end++;
}
if(end<len){
int index = binarySearch(nums, start, end-1, nums[end]);
int cost1 = nums[end];//deletion
int cost2 = 0;
for(int j = index; j<=end-1; j++)
cost2 += nums[j]-nums[end];//decrement
if(cost1<=cost2){
//delete item at index of end by shifting the rest of numbers after end
System.arraycopy(nums, end+1, nums, end, len-1-end);
len--;
cost += cost1;
}else{
for(int j = index; j<=end-1; j++)
nums[j] = nums[end];
cost += cost2;
}
}
end++;
}
return cost;
}

}



Wednesday, January 14, 2015

Total length of overlapping intervals


The original question is here: http://stackoverflow.com/questions/14522808/find-the-total-length-of-overlapped-intervals-using-segment-tree

My implementation takes O(n), but if this query is run multiple times and data structure needs to support dynamic insert and delete, O(n) seems not good enough. Is there any O(logN) solution?

public class Interval implements Comparable<Interval>{
int left, right;
public Interval(int l, int r){
this.left = l; this.right = r;
}
@Override
public int compareTo(Interval i){
return this.left - i.left;
}
public boolean overlap(Interval inter){
return this.left <= inter.right && this.right >= inter.left
}
}
/*
 * this implementation assuming static input, use array to present the ordered intervals
 * for dynamically input such as add/delete on the fly, 
* we can use balanced interval tree, then in-order travel to process the tree
* storage O(n)
* query time O(n) 
* construction: O(nlog(n))
* insertion: o(n)
*/
public int totalLength(Interval[] pairs){
//sort by the left corr
Arrays.sort(pairs);
List<Interval> merged = new ArrayList<Interval>();
merged.add(pairs[0]);
for(int i=1; i<pairs.length; i++){
int size = merged.size();
Interval lastInter = merged.get(size-1);
if(lastInter.overlap(pairs[i])){
lastInter.right = Math.max(lastInter.right, pairs[i].right);
}else{
merged.add(pairs[i]);
}
}
int sum = 0;
for(Interval inter : merged){
sum += inter.right - inter.left;
}
return sum;
}