-
Notifications
You must be signed in to change notification settings - Fork 87
/
MixtureOfGaussians.swift
488 lines (447 loc) · 21.7 KB
/
MixtureOfGaussians.swift
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
//
// MixtureOfGaussians.swift
// AIToolbox
//
// Created by Kevin Coble on 4/12/16.
// Copyright © 2016 Kevin Coble. All rights reserved.
//
import Foundation
import Accelerate
public enum MixtureOfGaussianError: Error {
case kMeansFailed
}
/// Class for mixture-of-gaussians density function learned from data
/// model is α₁𝒩(μ₁, Σ₁) + α₂𝒩(μ₂, Σ₂) + α₃𝒩(μ₃, Σ₃) + ...
/// Output is a single Double value that is the probability for for the input vector
open class MixtureOfGaussians : Regressor
{
open fileprivate(set) var inputDimension : Int
open fileprivate(set) var termCount : Int
open fileprivate(set) var diagonalΣ : Bool
open fileprivate(set) var gaussians : [Gaussian] = []
open fileprivate(set) var mvgaussians : [MultivariateGaussian] = []
open var α : [Double]
open var initWithKMeans = true // if true initial probability distributions are computed with kmeans of training data, else random
open var convergenceLimit = 0.0000001
var initializeFunction : ((_ trainData: MLDataSet)->[Double])!
public init(inputSize: Int, numberOfTerms: Int, diagonalCoVariance: Bool) throws
{
inputDimension = inputSize
termCount = numberOfTerms
diagonalΣ = diagonalCoVariance
do {
for _ in 0..<termCount {
if (inputDimension == 1) {
try gaussians.append(Gaussian(mean: 0.0, variance: 1.0))
}
else {
try mvgaussians.append(MultivariateGaussian(dimension: inputDimension, diagonalCovariance: diagonalΣ))
}
}
}
catch let error {
throw error
}
α = [Double](repeating: 1.0, count: termCount)
}
open func getInputDimension() -> Int
{
return inputDimension
}
open func getOutputDimension() -> Int
{
return 1
}
open func setParameters(_ parameters: [Double]) throws
{
if (parameters.count < getParameterDimension()) { throw MachineLearningError.notEnoughData }
var offset = 0
for index in 0..<termCount {
α[index] = parameters[offset]
offset += 1
if (inputDimension == 1) {
try gaussians[index].setParameters(Array(parameters[offset..<offset+2]))
offset += 2
}
else {
let neededSize = mvgaussians[index].getParameterDimension()
try mvgaussians[index].setParameters(Array(parameters[offset..<offset+neededSize]))
offset += neededSize
}
}
}
open func getParameterDimension() -> Int
{
var parameterCount = 1 + inputDimension // alpha and mean
if (diagonalΣ) {
parameterCount += inputDimension // If diagonal, only one covariance term per input
}
else {
parameterCount += inputDimension * inputDimension // If not diagonal, full matrix of covariance
}
parameterCount *= termCount // alpha, mean, and covariance for each gaussian term
return parameterCount
}
/// A custom initializer for Mixture-of-Gaussians must assign the training data to classes [the 'classes' member of the dataset]
open func setCustomInitializer(_ function: ((_ trainData: MLDataSet)->[Double])!) {
initializeFunction = function
}
open func getParameters() throws -> [Double]
{
var parameters : [Double] = []
for index in 0..<termCount {
if (inputDimension == 1) {
parameters += try gaussians[index].getParameters()
}
else {
parameters += try mvgaussians[index].getParameters()
}
}
return parameters
}
/// Function to calculate the parameters of the model
open func trainRegressor(_ trainData: MLRegressionDataSet) throws
{
// Verify that the data is regression data
if (trainData.dataType != .regression) { throw MachineLearningError.dataNotRegression }
if (trainData.inputDimension != inputDimension) { throw MachineLearningError.dataWrongDimension }
if (trainData.outputDimension != 1) { throw MachineLearningError.dataWrongDimension }
if (trainData.size < termCount) { throw MachineLearningError.notEnoughData }
// Initialize the gaussians and weights
// Algorithm from http://arxiv.org/pdf/1312.5946.pdf
var centroids : [[Double]] = []
for _ in 0..<termCount {
centroids.append([Double](repeating: 0.0, count: inputDimension))
}
// Make a classification data set from the regression set
if let classificationDataSet = DataSet(dataType : .classification, withInputsFrom: trainData) {
if (initWithKMeans) {
// Group the points using KMeans
let kmeans = KMeans(classes: termCount)
do {
try kmeans.train(classificationDataSet)
}
catch {
throw MixtureOfGaussianError.kMeansFailed
}
centroids = kmeans.centroids
}
else {
// Assign each point to a random term
if let initFunc = initializeFunction {
// If a custom initializer has been provided, use it to assign the initial classes
_ = initFunc(trainData)
}
else {
// No initializer, assign to random classes
for index in 0..<termCount { // Assign first few points to each class to guarantee at least one point per term
try classificationDataSet.setClass(index, newClass: index)
}
for index in termCount..<trainData.size {
try classificationDataSet.setClass(index, newClass: Int(arc4random_uniform(UInt32(termCount))))
}
}
// Calculate centroids
var counts = [Int](repeating: 0, count: termCount)
for point in 0..<trainData.size {
let pointClass = try classificationDataSet.getClass(point)
let inputs = try trainData.getInput(point)
counts[pointClass] += 1
let ptr = UnsafeMutablePointer<Double>(mutating: centroids[pointClass]) // Swift bug! can't put this in line!
vDSP_vaddD(inputs, 1, centroids[pointClass], 1, ptr, 1, vDSP_Length(inputDimension))
}
for term in 0..<termCount {
var inverse = 1.0 / Double(counts[term])
let ptr = UnsafeMutablePointer<Double>(mutating: centroids[term]) // Swift bug! can't put this in line!
vDSP_vsmulD(centroids[term], 1, &inverse, ptr, 1, vDSP_Length(inputDimension * inputDimension))
}
}
// Get the counts and sum the covariance terms
var counts = [Int](repeating: 0, count: termCount)
var covariance : [[Double]] = []
var sphericalCovariance = [Double](repeating: 0.0, count: inputDimension)
for _ in 0..<termCount { covariance.append([Double](repeating: 0.0, count: inputDimension * inputDimension))}
var offset = [Double](repeating: 0.0, count: inputDimension)
var matrix = [Double](repeating: 0.0, count: inputDimension * inputDimension)
for point in 0..<trainData.size {
// Count
let pointClass = try classificationDataSet.getClass(point)
let inputs = try trainData.getInput(point)
counts[pointClass] += 1
// Get the distance from the mean
vDSP_vsubD(centroids[pointClass], 1, inputs, 1, &offset, 1, vDSP_Length(inputDimension))
if (!diagonalΣ) { // We will force into spherical if diagonal covariance
// Multiply into covariance matrix and sum
vDSP_mmulD(offset, 1, offset, 1, &matrix, 1, vDSP_Length(inputDimension), vDSP_Length(inputDimension), vDSP_Length(1))
let ptr = UnsafeMutablePointer<Double>(mutating: covariance[pointClass]) // Swift bug! can't put this in line!
vDSP_vaddD(matrix, 1, covariance[pointClass], 1, ptr, 1, vDSP_Length(inputDimension * inputDimension))
}
// Get dot product and sum into spherical in case covariance matrix is not positive definite (dot product of offset is distance squared
var dotProduct = 0.0
vDSP_dotprD(offset, 1, offset, 1, &dotProduct, vDSP_Length(inputDimension))
sphericalCovariance[pointClass] += dotProduct
}
// If multivariate, verify positive-definite covariance matrix, or do same processing for diagonal covariance
if (inputDimension > 1 || diagonalΣ) {
var nonSPD = false
if (!diagonalΣ) {
// If not diagonal, check for positive definite
for term in 0..<termCount {
let uploChar = "U" as NSString
var uplo : Int8 = Int8(uploChar.character(at: 0)) // use upper triangle
var A = covariance[term] // Make a copy so it isn't mangled
var n : __CLPK_integer = __CLPK_integer(inputDimension)
var info : __CLPK_integer = 0
dpotrf_(&uplo, &n, &A, &n, &info)
if (info != 0) {
nonSPD = true
break
}
}
}
// If not positive-definite (or diagonal covariance), use spherical covariance
if (nonSPD || diagonalΣ) {
// Set each covariance matrix to the identity matrix times the sum of dotproduct of distances
for term in 0..<termCount {
covariance[term] = [Double](repeating: 0.0, count: inputDimension * inputDimension)
let scale = 1.0 / Double(inputDimension)
for row in 0..<inputDimension {
covariance[term][row * inputDimension + row] = sphericalCovariance[term] * scale
}
}
}
// The stated algorithm continues with another positive-definite check, but a positive constant times the identity matrix should always be positive definite, so I am stopping here
}
// Assign the value to the gaussians
for term in 0..<termCount {
α[term] = Double(counts[term]) / Double(trainData.size)
if (counts[term] > 1) {
var inverse = 1.0 / Double(counts[term])
let ptr = UnsafeMutablePointer<Double>(mutating: covariance[term]) // Swift bug! can't put this in line!
vDSP_vsmulD(covariance[term], 1, &inverse, ptr, 1, vDSP_Length(inputDimension * inputDimension))
}
if (inputDimension == 1) {
gaussians[term].setMean(centroids[term][0])
gaussians[term].setVariance(covariance[term][0])
}
else {
do {
try mvgaussians[term].setMean(centroids[term])
if (diagonalΣ) {
var diagonalTerms : [Double] = []
for row in 0..<inputDimension {
diagonalTerms.append(covariance[term][row * inputDimension + row])
}
try mvgaussians[term].setCovarianceMatrix(diagonalTerms)
}
else {
try mvgaussians[term].setCovarianceMatrix(covariance[term])
}
}
catch let error {
throw error
}
}
}
}
// Use the continue function to converge the model
do {
try continueTrainingRegressor(trainData)
}
catch let error {
throw error
}
}
/// Function to continue calculating the parameters of the model with more data, without initializing parameters
open func continueTrainingRegressor(_ trainData: MLRegressionDataSet) throws
{
// Verify that the data is regression data
if (trainData.dataType != DataSetType.regression) { throw MachineLearningError.dataNotRegression }
if (trainData.inputDimension != inputDimension) { throw MachineLearningError.dataWrongDimension }
if (trainData.outputDimension != 1) { throw MachineLearningError.dataWrongDimension }
// Calculate the log-likelihood with the current parameters for the convergence check
var lastLogLikelihood = 0.0
for point in 0..<trainData.size {
do {
let inputs = try trainData.getInput(point)
let sum = try predictOne(inputs)
lastLogLikelihood += log(sum[0])
}
catch let error {
throw error
}
}
// Use the EM algorithm until it converges
var membershipWeights : [[Double]]
var centroids : [[Double]] = []
var matrix = [Double](repeating: 0.0, count: inputDimension * inputDimension)
var difference = convergenceLimit + 1.0
while (difference > convergenceLimit) { // Go till convergence
difference = 0
// 'E' step
// Clear the membership weights
membershipWeights = []
for _ in 0..<termCount {
membershipWeights.append([Double](repeating: 0.0, count: trainData.size))
}
// Calculate the membership weights
var total : Double
var probability : Double
for point in 0..<trainData.size {
total = 0.0
for term in 0..<termCount {
let inputs = try trainData.getInput(point)
if (inputDimension == 1) {
probability = α[term] * gaussians[term].getProbability(inputs[0])
}
else {
probability = try α[term] * mvgaussians[term].getProbability(inputs)
}
total += probability
membershipWeights[term][point] = probability
}
let scale = 1.0 / total
for term in 0..<termCount {
membershipWeights[term][point] *= scale
}
}
// 'M' step
// Calculate α's and μ's
var weightTotals = [Double](repeating: 0.0, count: termCount)
var vector = [Double](repeating: 0.0, count: inputDimension)
centroids = []
for term in 0..<termCount {
centroids.append([Double](repeating: 0.0, count: inputDimension))
for point in 0..<trainData.size {
let inputs = try trainData.getInput(point)
weightTotals[term] += membershipWeights[term][point]
vDSP_vsmulD(inputs, 1, &membershipWeights[term][point], &vector, 1, vDSP_Length(inputDimension))
let ptr = UnsafeMutablePointer<Double>(mutating: centroids[term]) // Swift bug! can't put this in line!
vDSP_vaddD(vector, 1, centroids[term], 1, ptr, 1, vDSP_Length(inputDimension))
}
α[term] = weightTotals[term] / Double(trainData.size)
var scale = 1.0 / weightTotals[term]
let ptr = UnsafeMutablePointer<Double>(mutating: centroids[term]) // Swift bug! can't put this in line!
vDSP_vsmulD(centroids[term], 1, &scale, ptr, 1, vDSP_Length(inputDimension))
if (inputDimension == 1) {
gaussians[term].setMean(centroids[term][0])
}
else {
do {
try mvgaussians[term].setMean(centroids[term])
}
catch let error {
throw error
}
}
}
// Calculate the covariance matrices
if (diagonalΣ) {
for term in 0..<termCount {
var sphericalCovarianceValue = 0.0
for point in 0..<trainData.size {
// Get difference vector
let inputs = try trainData.getInput(point)
vDSP_vsubD(inputs, 1, centroids[term], 1, &vector, 1, vDSP_Length(inputDimension))
var dotProduct = 0.0
vDSP_dotprD(vector, 1, vector, 1, &dotProduct, vDSP_Length(inputDimension))
sphericalCovarianceValue += membershipWeights[term][point] * dotProduct
}
sphericalCovarianceValue /= (weightTotals[term] * Double(inputDimension))
var Σ = [Double](repeating: sphericalCovarianceValue, count: inputDimension)
if (inputDimension == 1) {
gaussians[term].setVariance(Σ[0])
}
else {
do {
try mvgaussians[term].setCovarianceMatrix(Σ)
}
catch let error {
throw error
}
}
}
}
else {
for term in 0..<termCount {
var Σ = [Double](repeating: 0.0, count: inputDimension * inputDimension)
for point in 0..<trainData.size {
// Get difference vector
let inputs = try trainData.getInput(point)
vDSP_vsubD(inputs, 1, centroids[term], 1, &vector, 1, vDSP_Length(inputDimension))
// Multiply by transpose to get covariance factor
vDSP_mmulD(vector, 1, vector, 1, &matrix, 1, vDSP_Length(inputDimension), vDSP_Length(inputDimension), vDSP_Length(1))
// Weight by the point's membership
vDSP_vsmulD(matrix, 1, &membershipWeights[term][point], &matrix, 1, vDSP_Length(inputDimension * inputDimension))
// Accumulate
vDSP_vaddD(matrix, 1, Σ, 1, &Σ, 1, vDSP_Length(inputDimension * inputDimension))
// Divide by total weight to get average covariance
var scale = 1.0 / weightTotals[term]
vDSP_vsmulD(Σ, 1, &scale, &Σ, 1, vDSP_Length(inputDimension * inputDimension))
}
if (inputDimension == 1) {
gaussians[term].setVariance(Σ[0])
}
else {
do {
try mvgaussians[term].setCovarianceMatrix(Σ)
}
catch let error {
throw error
}
}
}
}
// Calculate the log-likelihood to see if we have converged
var logLikelihood = 0.0
for point in 0..<trainData.size {
do {
let inputs = try trainData.getInput(point)
let sum = try predictOne(inputs)
logLikelihood += log(sum[0])
}
catch let error {
throw error
}
}
difference = fabs(lastLogLikelihood - logLikelihood)
lastLogLikelihood = logLikelihood
}
}
/// Function to calculate the result from an input vector
open func predictOne(_ inputs: [Double]) throws ->[Double]
{
if (inputs.count != inputDimension) { throw MachineLearningError.dataWrongDimension }
var result = 0.0
do {
for index in 0..<termCount {
if (inputDimension == 1) {
result += α[index] * gaussians[index].getProbability(inputs[0])
}
else {
result += try α[index] * mvgaussians[index].getProbability(inputs)
}
}
}
catch let error {
throw error
}
return [result]
}
open func predict(_ testData: MLRegressionDataSet) throws
{
// Verify the data set is the right type
if (testData.dataType != .regression) { throw MachineLearningError.dataNotRegression }
if (testData.inputDimension != inputDimension) { throw MachineLearningError.dataWrongDimension }
// predict on each input
for index in 0..<testData.size {
do {
let inputs = try testData.getInput(index)
try testData.setOutput(index, newOutput: predictOne(inputs))
}
catch let error {
throw error
}
}
}
}