Add force and mds method to dim_reduction

1. Add force and mds method to dim_reduction 2. Fix a bug about assert
suquark · Feb 1, 2017 · 934b841 · 934b841
1 parent 36c6e6d
commit 934b841
Show file tree

Hide file tree

Showing 5 changed files with 288 additions and 23 deletions.
diff --git a/dim_reduction/force.js b/dim_reduction/force.js
@@ -0,0 +1,150 @@
+/**
+ * Force field
+ */
+
+import { getopt } from 'util.js';
+import { assertArray2D } from 'util/assert.js';
+import { randn2d } from 'util/random.js';
+import { centerPoints, zeros2d, adjMatrixDistance, distance, L2 } from 'util/array.js';
+import { Adam } from 'optimizer/index.js';  // you can drop `index.js` if supported 
+
+/**
+ * @param {?Object} opt Options.
+ * @constructor
+ */
+class ForceField {
+    constructor(opt={}) {
+        this.dim = getopt(opt, 'dim', 2); // by default 2-D
+        this.epsilon = getopt(opt, 'epsilon', 1); // learning rate
+        this.nn = getopt(opt, 'nn', 3); // nested neighbors
+
+        this.iter = 0;
+    }
+
+    // this function takes a set of high-dimensional points
+    // and creates matrix P from them using gaussian kernel
+    initDataRaw(X) {
+        assertArray2D(X);
+        var dists = adjMatrixDistance(X); 
+        this.initDataDist(dists);
+    }
+
+    // this function takes a fattened distance matrix and creates
+    // matrix P from them.
+    // D is assumed to be provided as an array of size N^2.
+    initDataDist(D) {
+        var N = D.length;
+        this.D = D;
+        this.N = N;  // back up the size of the dataset
+        this.initSolution(); // refresh this
+    }
+
+    // (re)initializes the solution to random
+    initSolution() {
+        this.Y = randn2d(this.N, this.dim, 0.0, 1e-4); // the solution
+        for (let i in this.Y) {
+            this.Y[i].optimizer = new Adam(this.Y[i].length, { learning_rate: this.epsilon }); //new SpecialSGD(this.Y[i].length, { learning_rate: this.epsilon });
+        }
+        this.iter = 0;
+    }
+
+    // return pointer to current solution
+    get solution() { return this.Y; }
+
+
+    // perform a single step of optimization to improve the embedding
+    step(calc_cost = true) {
+        this.iter += 1;
+        let N = this.N;
+
+        let cg = this.costGrad(this.Y, calc_cost); // evaluate gradient
+        let cost = cg.cost;
+        let grad = cg.grad;
+
+        // perform gradient step
+        for (let i = 0; i < N; i++) {
+            this.Y[i].optimizer.update(this.Y[i], grad[i]);
+        }
+
+        // reproject Y to be zero mean
+        centerPoints(this.Y);
+
+        return cost; // return current cost
+    }
+
+    /** 
+     * return cost and gradient, given an arrangement
+     * 
+     * E = \frac{1}{\sum\limits_{i<j}d^{*}_{ij}}\sum_{i<j}\frac{(d^{*}_{ij}-d_{ij})^2}{d^{*}_{ij}}.
+     * 
+     * 
+     */
+    costGrad(Y, calc_cost=true) {
+        let D = this.D;
+        let N = this.N;
+        let dim = this.dim;
+        let grad = zeros2d(N, dim);
+        for (let i = 0; i < N; i++) {
+            for (let j = i + 1; j < N; j++) {
+                let k = -1.0 / L2(Y[i], Y[j]);
+                for (let d = 0; d < dim; d++) {
+                    let dx = Y[i][d] - Y[j][d];
+                    grad[i][d] += k * dx;
+                    grad[j][d] -= k * dx;
+                }
+            }
+        }
+
+        // calc cost
+        let sum = 0.
+        let cost = 0.;
+        if (calc_cost) {
+            let sum = 0.;  // normalize sum
+            for (let i = 0; i < N; i++) {
+                for (let j = i + 1; j < N; j++) {
+                    let Dij = D[i][j];
+                    let dij = distance(Y[i], Y[j]);
+                    sum += Dij;
+                    let Dd = Dij - dij;
+                    cost += 1 / (dij + 1e-8) + 0.5 * (Dd * Dd);
+                }
+            }
+
+        }
+
+        /////////////////
+
+        if (this.nn <= 0) {  // then calc all pairs
+
+            for (let i = 0; i < N; i++) {
+                for (let j = i + 1; j < N; j++) {
+                    let Dij = D[i][j];
+                    let dij = distance(Y[i], Y[j]);
+                    let k = (dij - Dij) / (dij + 1e-8);
+                    for (let d = 0; d < dim; d++) {
+                        let dx = Y[i][d] - Y[j][d];
+                        grad[i][d] += k * dx;
+                        grad[j][d] -= k * dx;
+                    }
+                }
+            }
+
+            if (calc_cost) {
+                for (let i = 0; i < N; i++) {
+                    for (let j = i + 1; j < N; j++) {
+                        let Dij = D[i][j];
+                        let dij = distance(Y[i], Y[j]);
+                        let Dd = Dij - dij;
+                        cost += 0.5 * (Dd * Dd);
+                    }
+                }
+            }
+        } else {  // calc knn edges
+
+        }
+
+        return { grad: grad, cost: cost / sum };
+    }
+}
+
+export { ForceField };
diff --git a/dim_reduction/mds.js b/dim_reduction/mds.js
@@ -0,0 +1,121 @@
+/**
+ * Multidimensional scaling
+ */
+
+import { getopt, assert } from 'util.js';
+import { assertArray2D } from 'util/assert.js';
+import { randn2d } from 'util/random.js';
+import { centerPoints, zeros2d, adjMatrixDistance, distance } from 'util/array.js';
+import { Adam } from 'optimizer/index.js';  // you can drop `index.js` if supported 
+
+/**
+ * Multidimensional scaling
+ * @param {?Object} opt Options.
+ * @constructor
+ */
+class MDS {
+    constructor(opt={}) {
+        this.dim = getopt(opt, 'dim', 2); // by default 2-D
+        this.epsilon = getopt(opt, 'epsilon', 1); // learning rate
+        this.iter = 0;
+    }
+
+    // this function takes a set of high-dimensional points
+    // and creates matrix P from them using gaussian kernel
+    initDataRaw(X) {
+        assertArray2D(X);
+        var dists = adjMatrixDistance(X); 
+        this.initDataDist(dists);
+    }
+
+    // this function takes a fattened distance matrix and creates
+    // matrix P from them.
+    // D is assumed to be provided as an array of size N^2.
+    initDataDist(D) {
+        var N = D.length;
+        this.D = D;
+        this.N = N;  // back up the size of the dataset
+        this.initSolution(); // refresh this
+    }
+
+    // (re)initializes the solution to random
+    initSolution() {
+        this.Y = randn2d(this.N, this.dim, 0.0, 1e-4); // the solution
+        for (let i in this.Y) {
+            this.Y[i].optimizer = new Adam(this.Y[i].length, { learning_rate: this.epsilon }); //new SpecialSGD(this.Y[i].length, { learning_rate: this.epsilon });
+        }
+        this.iter = 0;
+    }
+
+    // return pointer to current solution
+    get solution() { return this.Y; }
+
+
+    // perform a single step of optimization to improve the embedding
+    step(calc_cost = true) {
+        this.iter += 1;
+        let N = this.N;
+
+        let cg = this.costGrad(this.Y, calc_cost); // evaluate gradient
+        let cost = cg.cost;
+        let grad = cg.grad;
+
+        // perform gradient step
+        for (let i = 0; i < N; i++) {
+            this.Y[i].optimizer.update(this.Y[i], grad[i]);
+        }
+
+        // reproject Y to be zero mean
+        centerPoints(this.Y);
+
+        return cost; // return current cost
+    }
+
+    /** 
+     * return cost and gradient, given an arrangement
+     * 
+     * E = \frac{1}{\sum\limits_{i<j}d^{*}_{ij}}\sum_{i<j}(d^{*}_{ij}-d_{ij})^2.
+     * 
+     * 
+     */
+    costGrad(Y, calc_cost=true) {
+        let D = this.D;
+        let N = this.N;
+        let dim = this.dim;
+        let grad = zeros2d(N, dim);
+        for (let i = 0; i < N; i++) {
+            for (let j = i + 1; j < N; j++) {
+                //if ( i!= j ){
+                    let Dij = D[i][j];
+                    let dij = distance(Y[i], Y[j]);
+                    let k = 2.0 * (dij - Dij) / (dij + 1e-8);
+                    for (let d = 0; d < dim; d++) {
+                        let dx = Y[i][d] - Y[j][d];
+                        grad[i][d] += k * dx;
+                        grad[j][d] -= k * dx;
+                    }
+               // }
+            }
+        }
+        // calc cost
+        let cost = 0.;
+        if (calc_cost) {
+            let sum = 0.;  // normalize sum
+            for (let i = 0; i < N; i++) {
+                for (let j = i + 1; j < N; j++) {
+                    let Dij = D[i][j];
+                    let dij = distance(Y[i], Y[j]);
+                    sum += Dij;
+                    let Dd = Dij - dij;
+                    cost += Dd * Dd;
+                }
+            }
+            cost /= sum; 
+        }
+        return { grad: grad, cost: cost };
+    }
+
+
+}
+
+export { MDS };
diff --git a/dim_reduction/sammon.js b/dim_reduction/sammon.js
@@ -6,16 +6,15 @@ import { getopt, assert } from 'util.js';
 import { assertArray2D } from 'util/assert.js';
 import { randn2d } from 'util/random.js';
 import { centerPoints, zeros2d, adjMatrixDistance, distance } from 'util/array.js';
-import { Adam, SpecialSGD, RMSProp } from 'optimizer/index.js';  // you can drop `index.js` if supported 
+import { Adam } from 'optimizer/index.js';  // you can drop `index.js` if supported 
 
 /**
  * @param {?Object} opt Options.
  * @constructor
  */
 class SammonMapping {
     constructor(opt={}) {
-        this.perplexity = getopt(opt, 'perplexity', 30);
-        this.dim = getopt(opt, 'dim', 2); // by default 2-D tSNE
+        this.dim = getopt(opt, 'dim', 2); // by default 2-D
         this.epsilon = getopt(opt, 'epsilon', 1); // learning rate
         this.iter = 0;
     }
@@ -85,16 +84,14 @@ class SammonMapping {
         let grad = zeros2d(N, dim);
         for (let i = 0; i < N; i++) {
             for (let j = i + 1; j < N; j++) {
-                //if ( i!= j ){
-                    let Dij = D[i][j];
-                    let dij = distance(Y[i], Y[j]);
-                    let k = 2.0 * (dij - Dij) / (dij * Dij + 1e-8);
-                    for (let d = 0; d < dim; d++) {
-                        let dx = Y[i][d] - Y[j][d];
-                        grad[i][d] += k * dx;
-                        grad[j][d] -= k * dx;
-                    }
-               // }
+                let Dij = D[i][j];
+                let dij = distance(Y[i], Y[j]);
+                let k = 2.0 * (dij - Dij) / (dij * Dij + 1e-8);
+                for (let d = 0; d < dim; d++) {
+                    let dx = Y[i][d] - Y[j][d];
+                    grad[i][d] += k * dx;
+                    grad[j][d] -= k * dx;
+                }
             }
         }
         // calc cost

diff --git a/dim_reduction/tsne.js b/dim_reduction/tsne.js
@@ -22,7 +22,8 @@
  * THE SOFTWARE.
  */
 
-import { getopt, assert } from 'util.js';
+import { getopt } from 'util.js';
+import { assertArray2D, assertSquare } from 'util/assert.js';
 import { randn, randn2d } from 'util/random.js';
 import { zeros, array2d, zeros2d, centerPoints, L2, adjMatrixDistance } from 'util/array.js';
 import { entropy, norm_dist } from 'util/prob.js';
@@ -44,10 +45,8 @@ class tSNE {
     // this function takes a set of high-dimensional points
     // and creates matrix P from them using gaussian kernel
     initDataRaw(X) {
-        var N = X.length;
-        var D = X[0].length;
-        assert(N > 0, ' X is empty? You must have some data!');
-        assert(D > 0, ' X[0] is empty? Where is the data?');
+
+        assertArray2D(X);
         var dists = adjMatrixDistance(X); // convert X to distances using gaussian kernel
         this.initDataDist(dists);
     }
@@ -91,9 +90,7 @@ class tSNE {
      *   perplexity(P) = 2^H(P), the effective neighbor counts
      */
     distanceGaussian(D, perplexity, tol) {
-
-        assert(D.length === D[0].length, 'D should have square number of elements.');
-        let N = D.length;
+        let N = assertSquare(D);
         var Htarget = Math.log(perplexity); // target entropy of distribution
         var P = zeros2d(N, N); // temporary probability matrix
 

diff --git a/util/assert.js b/util/assert.js
@@ -27,8 +27,8 @@ function assertSquare(array) {
 }
 
 function assertArray2D(array) {
-    assert(!checkClass(array, 'Array'), 'The object should be an array');
-    assert(!checkClass(array[0], 'Array'), 'The object should be an array of array');
+    assert(checkClass(array, 'Array'), 'The object should be an array');
+    assert(checkClass(array[0], 'Array'), 'The object should be an array of array');
     return [array.length, array[0].length];
 }