Revisions to Diff Two Arrays - return the symmetric difference of the two arrays

few changes to take into account possibility of repeated elements in arr2

Source Link

edited Apr 7, 2016 at 15:13

1.1k
8
11

function diffArray(arr1, arr2) {
  var newArr = [];

  arr1.map(function(val) {
    arr2.indexOf(val) < 0 ? newArr.push(val) : '';
  });

  arr2.map(function(val) {
    arr1.indexOf(val) < 0 ? newArr.push(val) : '';
  });

  return newArr;
}


function diffArray2(arr1, arr2) {
    seen = {}
  var newArr = []
  arr1.forEach(function(val) {
    seen[val] = true1 // we saw it on first array   
  })
  arr2.forEach(function(val) {
    if (seen[val] == 1) {
  // we already saw it on the deletefirst one, unmark
      seen[val] = false
    } else if (seen.hasOwnProperty(seen[val]) == false) {
  // if we hadnt seen it earlier
      seen[val] = true2 // mark with a 2
    }
  })
  
  for (var val in seen) {
    if (seen[val]) { // if its a 1 or a 2, it was unique
      newArr.push(val)
    }
  }
  return newArr
}

var arr1 = []
var arr2=[]arr2 = []
var mx = 10000;
for (var i=0;i<mx;i++i = 0; i < mx; i++) {
    arr1.push(i)
  arr2.push(i+Mathi + Math.round(mx / 2))
}

console.time("diffArray with arrays and .index")
diffArray(arr1, arr2);
console.timeEnd("diffArray with arrays and .index")


console.time("diffArray with object")
diffArray2(arr1, arr2);
console.timeEnd("diffArray with object")

function diffArray(arr1, arr2) {
 var newArr = [];

  arr1.map(function(val){
   arr2.indexOf(val) < 0 ? newArr.push(val) : '';
  });

  arr2.map(function(val){
   arr1.indexOf(val) < 0 ? newArr.push(val) : '';
  });

  return newArr;
}


function diffArray2(arr1,arr2){
    seen = {}
  var newArr = []
  arr1.forEach(function(val){
    seen[val] = true    
  })
  arr2.forEach(function(val){
    if(seen[val]){
         delete seen[val]
    }else{
         seen[val] = true
    }
  })
  
  for(var val in seen){
    newArr.push(val)
  }
    return newArr
}

var arr1 = []
var arr2=[]
var mx = 10000;
for(var i=0;i<mx;i++){
    arr1.push(i)
  arr2.push(i+Math.round(mx/2))
}

console.time("diffArray with arrays and .index")
diffArray(arr1,arr2);
console.timeEnd("diffArray with arrays and .index")


console.time("diffArray with object")
diffArray2(arr1,arr2);
console.timeEnd("diffArray with object")

function diffArray(arr1, arr2) {
  var newArr = [];

  arr1.map(function(val) {
    arr2.indexOf(val) < 0 ? newArr.push(val) : '';
  });

  arr2.map(function(val) {
    arr1.indexOf(val) < 0 ? newArr.push(val) : '';
  });

  return newArr;
}


function diffArray2(arr1, arr2) {
  seen = {}
  var newArr = []
  arr1.forEach(function(val) {
    seen[val] = 1 // we saw it on first array   
  })
  arr2.forEach(function(val) {
    if (seen[val] == 1) { // we already saw it on the first one, unmark
      seen[val] = false
    } else if (seen.hasOwnProperty(seen[val]) == false) { // if we hadnt seen it earlier
      seen[val] = 2 // mark with a 2
    }
  })

  for (var val in seen) {
    if (seen[val]) { // if its a 1 or a 2, it was unique
      newArr.push(val)
    }
  }
  return newArr
}

var arr1 = []
var arr2 = []
var mx = 10000;
for (var i = 0; i < mx; i++) {
  arr1.push(i)
  arr2.push(i + Math.round(mx / 2))
}

console.time("diffArray with arrays and .index")
diffArray(arr1, arr2);
console.timeEnd("diffArray with arrays and .index")


console.time("diffArray with object")
diffArray2(arr1, arr2);
console.timeEnd("diffArray with object")

corrected spelling

Source Link

edited Apr 5, 2016 at 17:59

juvian

1.1k
8
11

As for performance: Array.indexOf in the worse case(element is not there), will have to check all values of the array to check if the valusvalue is in it. And doing this for all values of the second array, means O(n * m) where n is the arr1 size and m is the arr2 size. If n = m, we can consider that it takes O(n*n), which is quite costlyexpensive. You could improve this by orderingsorting both arrays and doing a smarter check, but even that will have the cost of the sorting, which is O(n * log n) with a good implementation

Having said all that, what you really want is a structure to check if you have seen or not an element that can check it really quick. In javascript, we can use objets to do this. As its native code we don´t really know the complexity of access, but with testing a bit it is clear that it´s much better (in theory access should be O(1) or O(log n))

The pseudo code is like this:

var seen = {}
iterate all map1 values and mark is as seen
iterate all map2 values and unmark if already seen, mark if not
add all seen values to new array
return array

And here is a full fiddle:

function diffArray(arr1, arr2) {
 var newArr = [];

  arr1.map(function(val){
   arr2.indexOf(val) < 0 ? newArr.push(val) : '';
  });

  arr2.map(function(val){
   arr1.indexOf(val) < 0 ? newArr.push(val) : '';
  });

  return newArr;
}


function diffArray2(arr1,arr2){
    seen = {}
  var newArr = []
  arr1.forEach(function(val){
    seen[val] = true    
  })
  arr2.forEach(function(val){
    if(seen[val]){
        delete seen[val]
    }else{
        seen[val] = true
    }
  })
  
  for(var val in seen){
    newArr.push(val)
  }
    return newArr
}

var arr1 = []
var arr2=[]
var mx = 10000;
for(var i=0;i<mx;i++){
    arr1.push(i)
  arr2.push(i+Math.round(mx/2))
}

console.time("diffArray with arrays and .index")
diffArray(arr1,arr2);
console.timeEnd("diffArray with arrays and .index")


console.time("diffArray with object")
diffArray2(arr1,arr2);
console.timeEnd("diffArray with object")

As for performance: Array.indexOf in the worse case(element is not there), will have to check all values of the array to check if the valus is in it. And doing this for all values of the second array, means O(n * m) where n is the arr1 size and m is the arr2 size. If n = m, we can consider that it takes O(n*n), which is quite costly. You could improve this by ordering both arrays and doing a smarter check, but even that will have the cost of the sorting, which is O(n * log n) with a good implementation

Having said all that, what you really want is a structure to check if you have seen or not an element that can check it really quick. In javascript, we can use objets to do this. As its native code we don´t really know the complexity of access, but with testing a bit it is clear that it´s much better (in theory access should be O(1) or O(log n))

The pseudo code is like this:

var seen = {}
iterate all map1 values and mark is as seen
iterate all map2 values and unmark if already seen, mark if not
add all seen values to new array
return array

And here is a full fiddle:

function diffArray(arr1, arr2) {
 var newArr = [];

  arr1.map(function(val){
   arr2.indexOf(val) < 0 ? newArr.push(val) : '';
  });

  arr2.map(function(val){
   arr1.indexOf(val) < 0 ? newArr.push(val) : '';
  });

  return newArr;
}


function diffArray2(arr1,arr2){
    seen = {}
  var newArr = []
  arr1.forEach(function(val){
    seen[val] = true    
  })
  arr2.forEach(function(val){
    if(seen[val]){
        delete seen[val]
    }else{
        seen[val] = true
    }
  })
  
  for(var val in seen){
    newArr.push(val)
  }
    return newArr
}

var arr1 = []
var arr2=[]
var mx = 10000;
for(var i=0;i<mx;i++){
    arr1.push(i)
  arr2.push(i+Math.round(mx/2))
}

console.time("diffArray with arrays and .index")
diffArray(arr1,arr2);
console.timeEnd("diffArray with arrays and .index")


console.time("diffArray with object")
diffArray2(arr1,arr2);
console.timeEnd("diffArray with object")

As for performance: Array.indexOf in the worse case(element is not there), will have to check all values of the array to check if the value is in it. And doing this for all values of the second array, means O(n * m) where n is the arr1 size and m is the arr2 size. If n = m, we can consider that it takes O(n*n), which is quite expensive. You could improve this by sorting both arrays and doing a smarter check, but even that will have the cost of the sorting, which is O(n * log n) with a good implementation

Having said all that, what you really want is a structure to check if you have seen or not an element that can check it really quick. In javascript, we can use objets to do this. As its native code we don´t really know the complexity of access, but with testing a bit it is clear that it´s much better (in theory access should be O(1) or O(log n))

The pseudo code is like this:

var seen = {}
iterate all map1 values and mark is as seen
iterate all map2 values and unmark if already seen, mark if not
add all seen values to new array
return array

And here is a full fiddle:

function diffArray(arr1, arr2) {
 var newArr = [];

  arr1.map(function(val){
   arr2.indexOf(val) < 0 ? newArr.push(val) : '';
  });

  arr2.map(function(val){
   arr1.indexOf(val) < 0 ? newArr.push(val) : '';
  });

  return newArr;
}


function diffArray2(arr1,arr2){
    seen = {}
  var newArr = []
  arr1.forEach(function(val){
    seen[val] = true    
  })
  arr2.forEach(function(val){
    if(seen[val]){
        delete seen[val]
    }else{
        seen[val] = true
    }
  })
  
  for(var val in seen){
    newArr.push(val)
  }
    return newArr
}

var arr1 = []
var arr2=[]
var mx = 10000;
for(var i=0;i<mx;i++){
    arr1.push(i)
  arr2.push(i+Math.round(mx/2))
}

console.time("diffArray with arrays and .index")
diffArray(arr1,arr2);
console.timeEnd("diffArray with arrays and .index")


console.time("diffArray with object")
diffArray2(arr1,arr2);
console.timeEnd("diffArray with object")

Source Link

answered Apr 5, 2016 at 17:50

juvian

1.1k
8
11

As for performance: Array.indexOf in the worse case(element is not there), will have to check all values of the array to check if the valus is in it. And doing this for all values of the second array, means O(n * m) where n is the arr1 size and m is the arr2 size. If n = m, we can consider that it takes O(n*n), which is quite costly. You could improve this by ordering both arrays and doing a smarter check, but even that will have the cost of the sorting, which is O(n * log n) with a good implementation

Having said all that, what you really want is a structure to check if you have seen or not an element that can check it really quick. In javascript, we can use objets to do this. As its native code we don´t really know the complexity of access, but with testing a bit it is clear that it´s much better (in theory access should be O(1) or O(log n))

The pseudo code is like this:

var seen = {}
iterate all map1 values and mark is as seen
iterate all map2 values and unmark if already seen, mark if not
add all seen values to new array
return array

And here is a full fiddle:

function diffArray(arr1, arr2) {
 var newArr = [];

  arr1.map(function(val){
   arr2.indexOf(val) < 0 ? newArr.push(val) : '';
  });

  arr2.map(function(val){
   arr1.indexOf(val) < 0 ? newArr.push(val) : '';
  });

  return newArr;
}


function diffArray2(arr1,arr2){
    seen = {}
  var newArr = []
  arr1.forEach(function(val){
    seen[val] = true    
  })
  arr2.forEach(function(val){
    if(seen[val]){
        delete seen[val]
    }else{
        seen[val] = true
    }
  })
  
  for(var val in seen){
    newArr.push(val)
  }
    return newArr
}

var arr1 = []
var arr2=[]
var mx = 10000;
for(var i=0;i<mx;i++){
    arr1.push(i)
  arr2.push(i+Math.round(mx/2))
}

console.time("diffArray with arrays and .index")
diffArray(arr1,arr2);
console.timeEnd("diffArray with arrays and .index")


console.time("diffArray with object")
diffArray2(arr1,arr2);
console.timeEnd("diffArray with object")

Stack Exchange Network

Return to Answer