-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathTestModelPerformance.html
More file actions
364 lines (314 loc) · 18 KB
/
Copy pathTestModelPerformance.html
File metadata and controls
364 lines (314 loc) · 18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
<!-- Generated by pkgdown: do not edit by hand -->
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Test the performance of spectral models — TestModelPerformance • waves</title>
<!-- favicons -->
<link rel="icon" type="image/png" sizes="16x16" href="../favicon-16x16.png">
<link rel="icon" type="image/png" sizes="32x32" href="../favicon-32x32.png">
<link rel="apple-touch-icon" type="image/png" sizes="180x180" href="../apple-touch-icon.png" />
<link rel="apple-touch-icon" type="image/png" sizes="120x120" href="../apple-touch-icon-120x120.png" />
<link rel="apple-touch-icon" type="image/png" sizes="76x76" href="../apple-touch-icon-76x76.png" />
<link rel="apple-touch-icon" type="image/png" sizes="60x60" href="../apple-touch-icon-60x60.png" />
<!-- jquery -->
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script>
<!-- Bootstrap -->
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous" />
<script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script>
<!-- bootstrap-toc -->
<link rel="stylesheet" href="../bootstrap-toc.css">
<script src="../bootstrap-toc.js"></script>
<!-- Font Awesome icons -->
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous" />
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous" />
<!-- clipboard.js -->
<script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script>
<!-- headroom.js -->
<script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script>
<!-- pkgdown -->
<link href="../pkgdown.css" rel="stylesheet">
<script src="../pkgdown.js"></script>
<meta property="og:title" content="Test the performance of spectral models — TestModelPerformance" />
<meta property="og:description" content="Wrapper that trains models based spectral data to predict
reference values and reports model performance statistics" />
<meta property="og:image" content="/logo.png" />
<!-- mathjax -->
<script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script>
<!--[if lt IE 9]>
<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
<![endif]-->
</head>
<body data-spy="scroll" data-target="#toc">
<div class="container template-reference-topic">
<header>
<div class="navbar navbar-default navbar-fixed-top" role="navigation">
<div class="container">
<div class="navbar-header">
<button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
<span class="sr-only">Toggle navigation</span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">waves</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.1.1.9000</span>
</span>
</div>
<div id="navbar" class="navbar-collapse collapse">
<ul class="nav navbar-nav">
<li>
<a href="../index.html">
<span class="fas fa-home fa-lg"></span>
</a>
</li>
<li>
<a href="../reference/index.html">Reference</a>
</li>
<li>
<a href="../news/index.html">Changelog</a>
</li>
</ul>
<ul class="nav navbar-nav navbar-right">
<li>
<a href="https://github.com/GoreLab/waves/">
<span class="fab fa-github fa-lg"></span>
</a>
</li>
</ul>
</div><!--/.nav-collapse -->
</div><!--/.container -->
</div><!--/.navbar -->
</header>
<div class="row">
<div class="col-md-9 contents">
<div class="page-header">
<h1>Test the performance of spectral models</h1>
<small class="dont-index">Source: <a href='https://github.com/GoreLab/waves/blob/master/R/TestModelPerformance.R'><code>R/TestModelPerformance.R</code></a></small>
<div class="hidden name"><code>TestModelPerformance.Rd</code></div>
</div>
<div class="ref-description">
<p>Wrapper that trains models based spectral data to predict
reference values and reports model performance statistics</p>
</div>
<pre class="usage"><span class='fu'>TestModelPerformance</span><span class='op'>(</span>
<span class='va'>train.data</span>,
<span class='va'>num.iterations</span>,
test.data <span class='op'>=</span> <span class='cn'>NULL</span>,
preprocessing <span class='op'>=</span> <span class='cn'>TRUE</span>,
wavelengths <span class='op'>=</span> <span class='fl'>740</span><span class='op'>:</span><span class='fl'>1070</span>,
tune.length <span class='op'>=</span> <span class='fl'>50</span>,
model.method <span class='op'>=</span> <span class='st'>"pls"</span>,
output.summary <span class='op'>=</span> <span class='cn'>TRUE</span>,
rf.variable.importance <span class='op'>=</span> <span class='cn'>FALSE</span>,
stratified.sampling <span class='op'>=</span> <span class='cn'>TRUE</span>,
cv.scheme <span class='op'>=</span> <span class='cn'>NULL</span>,
trial1 <span class='op'>=</span> <span class='cn'>NULL</span>,
trial2 <span class='op'>=</span> <span class='cn'>NULL</span>,
trial3 <span class='op'>=</span> <span class='cn'>NULL</span>,
split.test <span class='op'>=</span> <span class='cn'>FALSE</span>,
verbose <span class='op'>=</span> <span class='cn'>TRUE</span>
<span class='op'>)</span></pre>
<h2 class="hasAnchor" id="arguments"><a class="anchor" href="#arguments"></a>Arguments</h2>
<table class="ref-arguments">
<colgroup><col class="name" /><col class="desc" /></colgroup>
<tr>
<th>train.data</th>
<td><p><code>data.frame</code> object of spectral data for input into a
spectral prediction model. First column contains unique identifiers, second
contains reference values, followed by spectral columns. Include no other
columns to right of spectra! Column names of spectra must start with "X"
and reference column must be named "reference".</p></td>
</tr>
<tr>
<th>num.iterations</th>
<td><p>Number of training iterations to perform</p></td>
</tr>
<tr>
<th>test.data</th>
<td><p><code>data.frame</code> with same specifications as <code>df</code>. Use
if specific test set is desired for hyperparameter tuning. If <code>NULL</code>,
function will automatically train with a stratified sample of 70%. Default
is <code>NULL</code>.</p></td>
</tr>
<tr>
<th>preprocessing</th>
<td><p>If <code>TRUE</code>, 12 preprocessing methods will be applied
and their performance analyzed. If <code>FALSE</code>, input data is analyzed as
is (raw). Default is <code>FALSE</code>.</p></td>
</tr>
<tr>
<th>wavelengths</th>
<td><p>List of wavelengths represented by each column in
<code>train.data</code></p></td>
</tr>
<tr>
<th>tune.length</th>
<td><p>Number delineating search space for tuning of the PLSR
hyperparameter <code>ncomp</code>. Default is 50.</p></td>
</tr>
<tr>
<th>model.method</th>
<td><p>Model type to use for training. Valid options include:</p><ul>
<li><p>"pls": Partial least squares regression (Default)</p></li>
<li><p>"rf": Random forest</p></li>
<li><p>"svmLinear": Support vector machine with linear
kernel</p></li>
<li><p>"svmRadial": Support vector machine with radial kernel</p></li>
</ul></td>
</tr>
<tr>
<th>output.summary</th>
<td><p>boolean that controls function output.</p><ul>
<li><p>If <code>TRUE</code>, a summary df will be output (1st row = means, 2nd row =
standard deviations). Default is <code>TRUE</code>.</p></li>
<li><p>If <code>FALSE</code>, entire
results data frame will be output</p></li>
</ul></td>
</tr>
<tr>
<th>rf.variable.importance</th>
<td><p>boolean that:</p><ul>
<li><p>If <code>TRUE</code>,
<code>model.method</code> must be set to "rf". Returns a list with a model
performance <code>data.frame</code> and a second <code>data.frame</code> with variable
importance values for each wavelength for each training iteration. If
<code>return.model</code> is also <code>TRUE</code>, returns list of three elements
with trained model first, model performance second, and variable importance
last. Dimensions are <code>nrow = num.iterations</code>, <code>ncol =
length(wavelengths)</code>.</p></li>
<li><p>If <code>FALSE</code>, no variable importance is
returned. Default is <code>FALSE</code>.</p></li>
</ul></td>
</tr>
<tr>
<th>stratified.sampling</th>
<td><p>If <code>TRUE</code>, training and test sets will be
selected using stratified random sampling. This term is only used if
<code>test.data == NULL</code>. Default is <code>TRUE</code>.</p></td>
</tr>
<tr>
<th>cv.scheme</th>
<td><p>A cross validation (CV) scheme from Jarquín et al., 2017.
Options for cv.scheme include:</p><ul>
<li><p>"CV1": untested lines in tested environments</p></li>
<li><p>"CV2": tested lines in tested environments</p></li>
<li><p>"CV0": tested lines in untested environments</p></li>
<li><p>"CV00": untested lines in untested environments</p></li>
</ul></td>
</tr>
<tr>
<th>trial1</th>
<td><p><code>data.frame</code> object that is for use only when
<code>cv.scheme</code> is provided. Contains the trial to be tested in subsequent
model training functions. The first column contains unique identifiers,
second contains genotypes, third contains reference values, followed by
spectral columns. Include no other columns to right of spectra! Column
names of spectra must start with "X", reference column must be named
"reference", and genotype column must be named "genotype".</p></td>
</tr>
<tr>
<th>trial2</th>
<td><p><code>data.frame</code> object that is for use only when
<code>cv.scheme</code> is provided. This data.frame contains a trial that has
overlapping genotypes with <code>trial1</code> but that were grown in a different
site/year (different environment). Formatting must be consistent with
<code>trial1</code>.</p></td>
</tr>
<tr>
<th>trial3</th>
<td><p><code>data.frame</code> object that is for use only when
<code>cv.scheme</code> is provided. This data.frame contains a trial that may or
may not contain genotypes that overlap with <code>trial1</code>. Formatting must
be consistent with <code>trial1</code>.</p></td>
</tr>
<tr>
<th>split.test</th>
<td><p>boolean that allows for a fixed training set and a split
test set. Example// train model on data from two breeding programs and a
stratified subset (70%) of a third and test on the remaining samples
(30%) of the third. If <code>FALSE</code>, the entire provided test set
<code>test.data</code> will remain as a testing set or if none is provided, 30%
of the provided <code>train.data</code> will be used for testing. Default is
<code>FALSE</code>.</p></td>
</tr>
<tr>
<th>verbose</th>
<td><p>If <code>TRUE</code>, the number of rows removed through filtering
will be printed to the console. Default is <code>TRUE</code>.</p></td>
</tr>
</table>
<h2 class="hasAnchor" id="value"><a class="anchor" href="#value"></a>Value</h2>
<p><code>data.frame</code> with model performance statistics in summary format
(2 rows, one with mean and one with standard deviation of all training
iterations) or in long format (number of rows = num.iterations).
<strong>Note</strong> if <code>preprocessing = TRUE</code>, only the first mean of
summary statistics for all iterations of training are provided for each
technique.
Included summary statistics:</p><ul>
<li><p>Tuned parameters depending on the model algorithm:</p><ul>
<li><p><strong>Best.n.comp</strong>, the best number of components</p></li>
<li><p><strong>Best.ntree</strong>, the best number of trees in an RF model</p></li>
<li><p><strong>Best.mtry</strong>, the best number of variables to include at every decision point in an RF model</p></li>
</ul></li>
<li><p><strong>RMSECV</strong>, the root mean squared error of cross-validation</p></li>
<li><p><strong>R2cv</strong>, the coefficient of multiple determination of cross-validation for PLSR models</p></li>
<li><p><strong>RMSEP</strong>, the root mean squared error of prediction</p></li>
<li><p><strong>R2p</strong>, the squared Pearson’s correlation between predicted and observed test set values</p></li>
<li><p><strong>RPD</strong>, the ratio of standard deviation of observed test set values to RMSEP</p></li>
<li><p><strong>RPIQ</strong>, the ratio of performance to interquartile difference</p></li>
<li><p><strong>CCC</strong>, the concordance correlation coefficient</p></li>
<li><p><strong>Bias</strong>, the average difference between the predicted and observed values</p></li>
<li><p><strong>SEP</strong>, the standard error of prediction</p></li>
<li><p><strong>R2sp</strong>, the squared Spearman’s rank correlation between predicted and observed test set values</p></li>
</ul>
<h2 class="hasAnchor" id="details"><a class="anchor" href="#details"></a>Details</h2>
<p>Calls <code><a href='DoPreprocessing.html'>DoPreprocessing</a></code>, <code><a href='FormatCV.html'>FormatCV</a></code>,
and <code><a href='TrainSpectralModel.html'>TrainSpectralModel</a></code> functions.</p>
<h2 class="hasAnchor" id="author"><a class="anchor" href="#author"></a>Author</h2>
<h2 class="hasAnchor" id="examples"><a class="anchor" href="#examples"></a>Examples</h2>
<pre class="examples"><div class='input'><span class='co'># \donttest{</span>
<span class='kw'><a href='https://rdrr.io/r/base/library.html'>library</a></span><span class='op'>(</span><span class='va'><a href='https://magrittr.tidyverse.org'>magrittr</a></span><span class='op'>)</span>
<span class='va'>ikeogu.2017</span> <span class='op'>%>%</span>
<span class='fu'>dplyr</span><span class='fu'>::</span><span class='fu'><a href='https://dplyr.tidyverse.org/reference/rename.html'>rename</a></span><span class='op'>(</span>reference <span class='op'>=</span> <span class='va'>DMC.oven</span><span class='op'>)</span> <span class='op'>%>%</span>
<span class='fu'>dplyr</span><span class='fu'>::</span><span class='fu'><a href='https://dplyr.tidyverse.org/reference/rename.html'>rename</a></span><span class='op'>(</span>unique.id <span class='op'>=</span> <span class='va'>sample.id</span><span class='op'>)</span> <span class='op'>%>%</span>
<span class='fu'>dplyr</span><span class='fu'>::</span><span class='fu'><a href='https://dplyr.tidyverse.org/reference/select.html'>select</a></span><span class='op'>(</span><span class='va'>unique.id</span>, <span class='va'>reference</span>, <span class='fu'>dplyr</span><span class='fu'>::</span><span class='fu'><a href='https://tidyselect.r-lib.org/reference/starts_with.html'>starts_with</a></span><span class='op'>(</span><span class='st'>"X"</span><span class='op'>)</span><span class='op'>)</span> <span class='op'>%>%</span>
<span class='fu'><a href='https://rdrr.io/r/stats/na.fail.html'>na.omit</a></span><span class='op'>(</span><span class='op'>)</span> <span class='op'>%>%</span>
<span class='fu'>TestModelPerformance</span><span class='op'>(</span>train.data <span class='op'>=</span> <span class='va'>.</span>,
tune.length <span class='op'>=</span> <span class='fl'>3</span>,
num.iterations <span class='op'>=</span> <span class='fl'>3</span>,
preprocessing <span class='op'>=</span> <span class='cn'>FALSE</span>,
wavelengths <span class='op'>=</span> <span class='fl'>350</span><span class='op'>:</span><span class='fl'>2500</span><span class='op'>)</span>
</div><div class='output co'>#> Preprocessing skipped.
#> Training model...</div><div class='output co'>#> Summary_type RMSEp R2p RPD RPIQ CCC Bias
#> 1 mean 2.1167852 0.75288809 1.95105646 2.5489867 0.851316240 0.3601674
#> 2 sd 0.2200048 0.02925815 0.09030978 0.2419144 0.009943066 0.2981776
#> SEP RMSEcv R2cv R2sp best.ncomp
#> 1 2.1387213 1.9679886 0.77622197 0.67944538 3
#> 2 0.2222847 0.1075555 0.01506289 0.07185676 NA</div><div class='input'><span class='co'># }</span>
</div></pre>
</div>
<div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
<nav id="toc" data-toggle="toc" class="sticky-top">
<h2 data-toc-skip>Contents</h2>
</nav>
</div>
</div>
<footer>
<div class="copyright">
<p>Developed by Jenna Hershberger, NSF BREAD IOS-1543958.</p>
</div>
<div class="pkgdown">
<p>Site built with <a href="https://pkgdown.r-lib.org/">pkgdown</a> 1.6.1.</p>
</div>
</footer>
</div>
</body>
</html>