pktools 2.6.7
Processing Kernel for geospatial data
pksvmogr.cc
1/**********************************************************************
2pksvmogr.cc: classify vector dataset using Support Vector Machine
3Copyright (C) 2008-2017 Pieter Kempeneers
4
5This file is part of pktools
6
7pktools is free software: you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation, either version 3 of the License, or
10(at your option) any later version.
11
12pktools is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with pktools. If not, see <http://www.gnu.org/licenses/>.
19***********************************************************************/
20#include <stdlib.h>
21#include <vector>
22#include <map>
23#include <algorithm>
24#include "imageclasses/ImgReaderGdal.h"
25#include "imageclasses/ImgWriterGdal.h"
26#include "imageclasses/ImgReaderOgr.h"
27#include "imageclasses/ImgWriterOgr.h"
28#include "base/Optionpk.h"
29#include "base/PosValue.h"
30#include "algorithms/ConfusionMatrix.h"
31#include "algorithms/svm.h"
32
33#ifdef HAVE_CONFIG_H
34#include <config.h>
35#endif
36/******************************************************************************/
114namespace svm{
115 enum SVM_TYPE {C_SVC=0, nu_SVC=1,one_class=2, epsilon_SVR=3, nu_SVR=4};
116 enum KERNEL_TYPE {linear=0,polynomial=1,radial=2,sigmoid=3};
117}
118
119#define Malloc(type,n) (type *)malloc((n)*sizeof(type))
120
121using namespace std;
122
123int main(int argc, char *argv[])
124{
125 vector<double> priors;
126
127 //--------------------------- command line options ------------------------------------
128 Optionpk<string> input_opt("i", "input", "input image");
129 Optionpk<string> training_opt("t", "training", "Training vector file. A single vector file contains all training features (must be set as: b0, b1, b2,...) for all classes (class numbers identified by label option). Use multiple training files for bootstrap aggregation (alternative to the bag and bsize options, where a random subset is taken from a single training file)");
130 Optionpk<string> tlayer_opt("tln", "tln", "Training layer name(s)");
131 Optionpk<string> label_opt("label", "label", "Attribute name for class label in training vector file.","label");
132 Optionpk<unsigned int> balance_opt("bal", "balance", "Balance the input data to this number of samples for each class", 0);
133 Optionpk<bool> random_opt("random", "random", "Randomize training data for balancing and bagging", true, 2);
134 Optionpk<int> minSize_opt("min", "min", "If number of training pixels is less then min, do not take this class into account (0: consider all classes)", 0);
135 Optionpk<unsigned short> band_opt("b", "band", "Band index (starting from 0, either use band option or use start to end)");
136 Optionpk<unsigned short> bstart_opt("sband", "startband", "Start band sequence number");
137 Optionpk<unsigned short> bend_opt("eband", "endband", "End band sequence number");
138 Optionpk<double> offset_opt("offset", "offset", "Offset value for each spectral band input features: refl[band]=(DN[band]-offset[band])/scale[band]", 0.0);
139 Optionpk<double> scale_opt("scale", "scale", "Scale value for each spectral band input features: refl=(DN[band]-offset[band])/scale[band] (use 0 if scale min and max in each band to -1.0 and 1.0)", 0.0);
140 Optionpk<double> priors_opt("prior", "prior", "Prior probabilities for each class (e.g., -p 0.3 -p 0.3 -p 0.2 ). Used for input only (ignored for cross validation)", 0.0);
141 Optionpk<string> priorimg_opt("pim", "priorimg", "Prior probability image (multi-band img with band for each class","",2);
142 Optionpk<unsigned short> cv_opt("cv", "cv", "N-fold cross validation mode",0);
143 Optionpk<string> cmformat_opt("cmf","cmf","Format for confusion matrix (ascii or latex)","ascii");
144 Optionpk<std::string> svm_type_opt("svmt", "svmtype", "Type of SVM (C_SVC, nu_SVC,one_class, epsilon_SVR, nu_SVR)","C_SVC");
145 Optionpk<std::string> kernel_type_opt("kt", "kerneltype", "Type of kernel function (linear,polynomial,radial,sigmoid) ","radial");
146 Optionpk<unsigned short> kernel_degree_opt("kd", "kd", "Degree in kernel function",3);
147 Optionpk<float> gamma_opt("g", "gamma", "Gamma in kernel function",1.0);
148 Optionpk<float> coef0_opt("c0", "coef0", "Coef0 in kernel function",0);
149 Optionpk<float> ccost_opt("cc", "ccost", "The parameter C of C_SVC, epsilon_SVR, and nu_SVR",1000);
150 Optionpk<float> nu_opt("nu", "nu", "The parameter nu of nu_SVC, one_class SVM, and nu_SVR",0.5);
151 Optionpk<float> epsilon_loss_opt("eloss", "eloss", "The epsilon in loss function of epsilon_SVR",0.1);
152 Optionpk<int> cache_opt("cache", "cache", "Cache memory size in MB",100);
153 Optionpk<float> epsilon_tol_opt("etol", "etol", "The tolerance of termination criterion",0.001);
154 Optionpk<bool> shrinking_opt("shrink", "shrink", "Whether to use the shrinking heuristics",false);
155 Optionpk<bool> prob_est_opt("pe", "probest", "Whether to train a SVC or SVR model for probability estimates",true,2);
156 // Optionpk<bool> weight_opt("wi", "wi", "Set the parameter C of class i to weight*C, for C_SVC",true);
157 Optionpk<unsigned short> comb_opt("comb", "comb", "How to combine bootstrap aggregation classifiers (0: sum rule, 1: product rule, 2: max rule). Also used to aggregate classes with rc option.",0);
158 Optionpk<unsigned short> bag_opt("bag", "bag", "Number of bootstrap aggregations", 1);
159 Optionpk<int> bagSize_opt("bagsize", "bagsize", "Percentage of features used from available training features for each bootstrap aggregation (one size for all classes, or a different size for each class respectively", 100);
160 Optionpk<string> classBag_opt("cb", "classbag", "Output for each individual bootstrap aggregation");
161 Optionpk<string> mask_opt("m", "mask", "Only classify within specified mask (vector or raster).");
162 Optionpk<unsigned short> nodata_opt("nodata", "nodata", "Nodata value to put where vector dataset is masked as nodata", 0);
163 Optionpk<string> output_opt("o", "output", "Output vector dataset");
164 Optionpk<string> option_opt("co", "co", "Creation option for output file. Multiple options can be specified.");
165 Optionpk<string> colorTable_opt("ct", "ct", "Color table in ASCII format having 5 columns: id R G B ALFA (0: transparent, 255: solid)");
166 Optionpk<string> entropy_opt("entropy", "entropy", "Entropy image (measure for uncertainty of classifier output","",2);
167 Optionpk<string> active_opt("active", "active", "Ogr output for active training sample.","",2);
168 Optionpk<string> ogrformat_opt("f", "f", "Output ogr format for active training sample","SQLite");
169 Optionpk<unsigned int> nactive_opt("na", "nactive", "Number of active training points",1);
170 Optionpk<string> classname_opt("c", "class", "List of class names.");
171 Optionpk<short> classvalue_opt("r", "reclass", "List of class values (use same order as in class opt).");
172 Optionpk<short> verbose_opt("v", "verbose", "Verbose level",0,2);
173
174 option_opt.setHide(1);
175 band_opt.setHide(1);
176 bstart_opt.setHide(1);
177 bend_opt.setHide(1);
178 balance_opt.setHide(1);
179 minSize_opt.setHide(1);
180 bag_opt.setHide(1);
181 bagSize_opt.setHide(1);
182 comb_opt.setHide(1);
183 classBag_opt.setHide(1);
184 priorimg_opt.setHide(1);
185 offset_opt.setHide(1);
186 scale_opt.setHide(1);
187 svm_type_opt.setHide(1);
188 kernel_type_opt.setHide(1);
189 kernel_degree_opt.setHide(1);
190 coef0_opt.setHide(1);
191 nu_opt.setHide(1);
192 epsilon_loss_opt.setHide(1);
193 cache_opt.setHide(1);
194 epsilon_tol_opt.setHide(1);
195 shrinking_opt.setHide(1);
196 prob_est_opt.setHide(1);
197 entropy_opt.setHide(1);
198 active_opt.setHide(1);
199 nactive_opt.setHide(1);
200 random_opt.setHide(1);
201
202 verbose_opt.setHide(2);
203
204 bool doProcess;//stop process when program was invoked with help option (-h --help)
205 try{
206 doProcess=training_opt.retrieveOption(argc,argv);
207 input_opt.retrieveOption(argc,argv);
208 output_opt.retrieveOption(argc,argv);
209 cv_opt.retrieveOption(argc,argv);
210 cmformat_opt.retrieveOption(argc,argv);
211 tlayer_opt.retrieveOption(argc,argv);
212 classname_opt.retrieveOption(argc,argv);
213 classvalue_opt.retrieveOption(argc,argv);
214 ogrformat_opt.retrieveOption(argc,argv);
215 option_opt.retrieveOption(argc,argv);
216 colorTable_opt.retrieveOption(argc,argv);
217 label_opt.retrieveOption(argc,argv);
218 priors_opt.retrieveOption(argc,argv);
219 gamma_opt.retrieveOption(argc,argv);
220 ccost_opt.retrieveOption(argc,argv);
221 mask_opt.retrieveOption(argc,argv);
222 nodata_opt.retrieveOption(argc,argv);
223 // Advanced options
224 band_opt.retrieveOption(argc,argv);
225 bstart_opt.retrieveOption(argc,argv);
226 bend_opt.retrieveOption(argc,argv);
227 balance_opt.retrieveOption(argc,argv);
228 minSize_opt.retrieveOption(argc,argv);
229 bag_opt.retrieveOption(argc,argv);
230 bagSize_opt.retrieveOption(argc,argv);
231 comb_opt.retrieveOption(argc,argv);
232 classBag_opt.retrieveOption(argc,argv);
233 priorimg_opt.retrieveOption(argc,argv);
234 offset_opt.retrieveOption(argc,argv);
235 scale_opt.retrieveOption(argc,argv);
236 svm_type_opt.retrieveOption(argc,argv);
237 kernel_type_opt.retrieveOption(argc,argv);
238 kernel_degree_opt.retrieveOption(argc,argv);
239 coef0_opt.retrieveOption(argc,argv);
240 nu_opt.retrieveOption(argc,argv);
241 epsilon_loss_opt.retrieveOption(argc,argv);
242 cache_opt.retrieveOption(argc,argv);
243 epsilon_tol_opt.retrieveOption(argc,argv);
244 shrinking_opt.retrieveOption(argc,argv);
245 prob_est_opt.retrieveOption(argc,argv);
246 entropy_opt.retrieveOption(argc,argv);
247 active_opt.retrieveOption(argc,argv);
248 nactive_opt.retrieveOption(argc,argv);
249 verbose_opt.retrieveOption(argc,argv);
250 random_opt.retrieveOption(argc,argv);
251 }
252 catch(string predefinedString){
253 std::cout << predefinedString << std::endl;
254 exit(0);
255 }
256 if(!doProcess){
257 cout << endl;
258 cout << "Usage: pksvmogr -t training [-i input -o output] [-cv value]" << endl;
259 cout << endl;
260 std::cout << "short option -h shows basic options only, use long option --help to show all options" << std::endl;
261 exit(0);//help was invoked, stop processing
262 }
263
264 if(entropy_opt[0]=="")
265 entropy_opt.clear();
266 if(active_opt[0]=="")
267 active_opt.clear();
268 if(priorimg_opt[0]=="")
269 priorimg_opt.clear();
270
271
272 std::map<std::string, svm::SVM_TYPE> svmMap;
273
274 svmMap["C_SVC"]=svm::C_SVC;
275 svmMap["nu_SVC"]=svm::nu_SVC;
276 svmMap["one_class"]=svm::one_class;
277 svmMap["epsilon_SVR"]=svm::epsilon_SVR;
278 svmMap["nu_SVR"]=svm::nu_SVR;
279
280 std::map<std::string, svm::KERNEL_TYPE> kernelMap;
281
282 kernelMap["linear"]=svm::linear;
283 kernelMap["polynomial"]=svm::polynomial;
284 kernelMap["radial"]=svm::radial;
285 kernelMap["sigmoid;"]=svm::sigmoid;
286
287 assert(training_opt.size());
288
289 if(verbose_opt[0]>=1){
290 if(input_opt.size())
291 std::cout << "input filename: " << input_opt[0] << std::endl;
292 if(mask_opt.size())
293 std::cout << "mask filename: " << mask_opt[0] << std::endl;
294 std::cout << "training vector file: " << std::endl;
295 for(int ifile=0;ifile<training_opt.size();++ifile)
296 std::cout << training_opt[ifile] << std::endl;
297 std::cout << "verbose: " << verbose_opt[0] << std::endl;
298 }
299 unsigned short nbag=(training_opt.size()>1)?training_opt.size():bag_opt[0];
300 if(verbose_opt[0]>=1)
301 std::cout << "number of bootstrap aggregations: " << nbag << std::endl;
302
303 ImgReaderOgr extentReader;
304 OGRLayer *readLayer;
305
306 double ulx=0;
307 double uly=0;
308 double lrx=0;
309 double lry=0;
310
311 bool maskIsVector=false;
312 if(mask_opt.size()){
313 try{
314 extentReader.open(mask_opt[0]);
315 maskIsVector=true;
316 readLayer = extentReader.getDataSource()->GetLayer(0);
317 if(!(extentReader.getExtent(ulx,uly,lrx,lry))){
318 cerr << "Error: could not get extent from " << mask_opt[0] << endl;
319 exit(1);
320 }
321 }
322 catch(string errorString){
323 maskIsVector=false;
324 }
325 }
326
327 ImgWriterOgr activeWriter;
328 if(active_opt.size()){
329 prob_est_opt[0]=true;
330 ImgReaderOgr trainingReader(training_opt[0]);
331 activeWriter.open(active_opt[0],ogrformat_opt[0]);
332 activeWriter.createLayer(active_opt[0],trainingReader.getProjection(),wkbPoint,NULL);
333 activeWriter.copyFields(trainingReader);
334 }
335 vector<PosValue> activePoints(nactive_opt[0]);
336 for(int iactive=0;iactive<activePoints.size();++iactive){
337 activePoints[iactive].value=1.0;
338 activePoints[iactive].posx=0.0;
339 activePoints[iactive].posy=0.0;
340 }
341
342 unsigned int totalSamples=0;
343 unsigned int nactive=0;
344 vector<struct svm_model*> svm(nbag);
345 vector<struct svm_parameter> param(nbag);
346
347 short nclass=0;
348 int nband=0;
349 int startBand=2;//first two bands represent X and Y pos
350
351 //normalize priors from command line
352 if(priors_opt.size()>1){//priors from argument list
353 priors.resize(priors_opt.size());
354 double normPrior=0;
355 for(short iclass=0;iclass<priors_opt.size();++iclass){
356 priors[iclass]=priors_opt[iclass];
357 normPrior+=priors[iclass];
358 }
359 //normalize
360 for(short iclass=0;iclass<priors_opt.size();++iclass)
361 priors[iclass]/=normPrior;
362 }
363
364 //convert start and end band options to vector of band indexes
365 try{
366 if(bstart_opt.size()){
367 if(bend_opt.size()!=bstart_opt.size()){
368 string errorstring="Error: options for start and end band indexes must be provided as pairs, missing end band";
369 throw(errorstring);
370 }
371 band_opt.clear();
372 for(int ipair=0;ipair<bstart_opt.size();++ipair){
373 if(bend_opt[ipair]<=bstart_opt[ipair]){
374 string errorstring="Error: index for end band must be smaller then start band";
375 throw(errorstring);
376 }
377 for(int iband=bstart_opt[ipair];iband<=bend_opt[ipair];++iband)
378 band_opt.push_back(iband);
379 }
380 }
381 }
382 catch(string error){
383 cerr << error << std::endl;
384 exit(1);
385 }
386 //sort bands
387 if(band_opt.size())
388 std::sort(band_opt.begin(),band_opt.end());
389
390 map<string,short> classValueMap;
391 vector<std::string> nameVector;
392 if(classname_opt.size()){
393 assert(classname_opt.size()==classvalue_opt.size());
394 for(int iclass=0;iclass<classname_opt.size();++iclass)
395 classValueMap[classname_opt[iclass]]=classvalue_opt[iclass];
396 }
397
398 //----------------------------------- Training -------------------------------
400 vector< vector<double> > offset(nbag);
401 vector< vector<double> > scale(nbag);
402 map<string,Vector2d<float> > trainingMap;
403 vector< Vector2d<float> > trainingPixels;//[class][sample][band]
404 vector<string> fields;
405
406 vector<struct svm_problem> prob(nbag);
407 vector<struct svm_node *> x_space(nbag);
408
409 for(int ibag=0;ibag<nbag;++ibag){
410 //organize training data
411 if(ibag<training_opt.size()){//if bag contains new training pixels
412 trainingMap.clear();
413 trainingPixels.clear();
414 if(verbose_opt[0]>=1)
415 std::cout << "reading imageVector file " << training_opt[0] << std::endl;
416 try{
417 ImgReaderOgr trainingReaderBag(training_opt[ibag]);
418 if(band_opt.size())
419 //todo: when tlayer_opt is provided, readDataImageOgr does not read any layer
420 totalSamples=trainingReaderBag.readDataImageOgr(trainingMap,fields,band_opt,label_opt[0],tlayer_opt,verbose_opt[0]);
421 else
422 totalSamples=trainingReaderBag.readDataImageOgr(trainingMap,fields,0,0,label_opt[0],tlayer_opt,verbose_opt[0]);
423 if(trainingMap.size()<2){
424 string errorstring="Error: could not read at least two classes from training file, did you provide class labels in training sample (see option label)?";
425 throw(errorstring);
426 }
427 trainingReaderBag.close();
428 }
429 catch(string error){
430 cerr << error << std::endl;
431 exit(1);
432 }
433 catch(std::exception& e){
434 std::cerr << "Error: ";
435 std::cerr << e.what() << std::endl;
436 std::cerr << CPLGetLastErrorMsg() << std::endl;
437 exit(1);
438 }
439 catch(...){
440 cerr << "error caught" << std::endl;
441 exit(1);
442 }
443
444 //convert map to vector
445 // short iclass=0;
446 if(verbose_opt[0]>1)
447 std::cout << "training pixels: " << std::endl;
448 map<string,Vector2d<float> >::iterator mapit=trainingMap.begin();
449 while(mapit!=trainingMap.end()){
450 //delete small classes
451 if((mapit->second).size()<minSize_opt[0]){
452 trainingMap.erase(mapit);
453 continue;
454 }
455 trainingPixels.push_back(mapit->second);
456 if(verbose_opt[0]>1)
457 std::cout << mapit->first << ": " << (mapit->second).size() << " samples" << std::endl;
458 ++mapit;
459 }
460 if(!ibag){
461 nclass=trainingPixels.size();
462 if(classname_opt.size())
463 assert(nclass==classname_opt.size());
464 nband=trainingPixels[0][0].size()-2;//X and Y//trainingPixels[0][0].size();
465 }
466 else{
467 assert(nclass==trainingPixels.size());
468 assert(nband==trainingPixels[0][0].size()-2);
469 }
470
471 //do not remove outliers here: could easily be obtained through ogr2ogr -where 'B2<110' output.shp input.shp
472 //balance training data
473 if(balance_opt[0]>0){
474 while(balance_opt.size()<nclass)
475 balance_opt.push_back(balance_opt.back());
476 if(random_opt[0])
477 srand(time(NULL));
478 totalSamples=0;
479 for(short iclass=0;iclass<nclass;++iclass){
480 if(trainingPixels[iclass].size()>balance_opt[iclass]){
481 while(trainingPixels[iclass].size()>balance_opt[iclass]){
482 int index=rand()%trainingPixels[iclass].size();
483 trainingPixels[iclass].erase(trainingPixels[iclass].begin()+index);
484 }
485 }
486 else{
487 int oldsize=trainingPixels[iclass].size();
488 for(int isample=trainingPixels[iclass].size();isample<balance_opt[iclass];++isample){
489 int index = rand()%oldsize;
490 trainingPixels[iclass].push_back(trainingPixels[iclass][index]);
491 }
492 }
493 totalSamples+=trainingPixels[iclass].size();
494 }
495 }
496
497 //set scale and offset
498 offset[ibag].resize(nband);
499 scale[ibag].resize(nband);
500 if(offset_opt.size()>1)
501 assert(offset_opt.size()==nband);
502 if(scale_opt.size()>1)
503 assert(scale_opt.size()==nband);
504 for(int iband=0;iband<nband;++iband){
505 if(verbose_opt[0]>=1)
506 std::cout << "scaling for band" << iband << std::endl;
507 offset[ibag][iband]=(offset_opt.size()==1)?offset_opt[0]:offset_opt[iband];
508 scale[ibag][iband]=(scale_opt.size()==1)?scale_opt[0]:scale_opt[iband];
509 //search for min and maximum
510 if(scale[ibag][iband]<=0){
511 float theMin=trainingPixels[0][0][iband+startBand];
512 float theMax=trainingPixels[0][0][iband+startBand];
513 for(short iclass=0;iclass<nclass;++iclass){
514 for(int isample=0;isample<trainingPixels[iclass].size();++isample){
515 if(theMin>trainingPixels[iclass][isample][iband+startBand])
516 theMin=trainingPixels[iclass][isample][iband+startBand];
517 if(theMax<trainingPixels[iclass][isample][iband+startBand])
518 theMax=trainingPixels[iclass][isample][iband+startBand];
519 }
520 }
521 offset[ibag][iband]=theMin+(theMax-theMin)/2.0;
522 scale[ibag][iband]=(theMax-theMin)/2.0;
523 if(verbose_opt[0]>=1){
524 std::cout << "Extreme image values for band " << iband << ": [" << theMin << "," << theMax << "]" << std::endl;
525 std::cout << "Using offset, scale: " << offset[ibag][iband] << ", " << scale[ibag][iband] << std::endl;
526 std::cout << "scaled values for band " << iband << ": [" << (theMin-offset[ibag][iband])/scale[ibag][iband] << "," << (theMax-offset[ibag][iband])/scale[ibag][iband] << "]" << std::endl;
527 }
528 }
529 }
530 }
531 else{//use same offset and scale
532 offset[ibag].resize(nband);
533 scale[ibag].resize(nband);
534 for(int iband=0;iband<nband;++iband){
535 offset[ibag][iband]=offset[0][iband];
536 scale[ibag][iband]=scale[0][iband];
537 }
538 }
539
540 if(!ibag){
541 if(priors_opt.size()==1){//default: equal priors for each class
542 priors.resize(nclass);
543 for(short iclass=0;iclass<nclass;++iclass)
544 priors[iclass]=1.0/nclass;
545 }
546 assert(priors_opt.size()==1||priors_opt.size()==nclass);
547
548 //set bagsize for each class if not done already via command line
549 while(bagSize_opt.size()<nclass)
550 bagSize_opt.push_back(bagSize_opt.back());
551
552 if(verbose_opt[0]>=1){
553 std::cout << "number of bands: " << nband << std::endl;
554 std::cout << "number of classes: " << nclass << std::endl;
555 if(priorimg_opt.empty()){
556 std::cout << "priors:";
557 for(short iclass=0;iclass<nclass;++iclass)
558 std::cout << " " << priors[iclass];
559 std::cout << std::endl;
560 }
561 }
562 map<string,Vector2d<float> >::iterator mapit=trainingMap.begin();
563 bool doSort=true;
564 try{
565 while(mapit!=trainingMap.end()){
566 nameVector.push_back(mapit->first);
567 if(classValueMap.size()){
568 //check if name in training is covered by classname_opt (values can not be 0)
569 if(classValueMap[mapit->first]>0){
570 if(cm.getClassIndex(type2string<short>(classValueMap[mapit->first]))<0){
571 cm.pushBackClassName(type2string<short>(classValueMap[mapit->first]),doSort);
572 }
573 }
574 else{
575 std::cerr << "Error: names in classname option are not complete, please check names in training vector and make sure classvalue is > 0" << std::endl;
576 exit(1);
577 }
578 }
579 else
580 cm.pushBackClassName(mapit->first,doSort);
581 ++mapit;
582 }
583 }
584 catch(BadConversion conversionString){
585 std::cerr << "Error: did you provide class pairs names (-c) and integer values (-r) for each class in training vector?" << std::endl;
586 exit(1);
587 }
588 if(classname_opt.empty()){
589 //std::cerr << "Warning: no class name and value pair provided for all " << nclass << " classes, using string2type<int> instead!" << std::endl;
590 for(int iclass=0;iclass<nclass;++iclass){
591 if(verbose_opt[0])
592 std::cout << iclass << " " << cm.getClass(iclass) << " -> " << string2type<short>(cm.getClass(iclass)) << std::endl;
593 classValueMap[cm.getClass(iclass)]=string2type<short>(cm.getClass(iclass));
594 }
595 }
596
597 // if(priors_opt.size()==nameVector.size()){
598 // std::cerr << "Warning: please check if priors are provided in correct order!!!" << std::endl;
599 // for(int iclass=0;iclass<nameVector.size();++iclass)
600 // std::cerr << nameVector[iclass] << " " << priors_opt[iclass] << std::endl;
601 // }
602 }//if(!ibag)
603
604 //Calculate features of training set
605 vector< Vector2d<float> > trainingFeatures(nclass);
606 for(short iclass=0;iclass<nclass;++iclass){
607 int nctraining=0;
608 if(verbose_opt[0]>=1)
609 std::cout << "calculating features for class " << iclass << std::endl;
610 if(random_opt[0])
611 srand(time(NULL));
612 nctraining=(bagSize_opt[iclass]<100)? trainingPixels[iclass].size()/100.0*bagSize_opt[iclass] : trainingPixels[iclass].size();//bagSize_opt[iclass] given in % of training size
613 if(nctraining<=0)
614 nctraining=1;
615 assert(nctraining<=trainingPixels[iclass].size());
616 int index=0;
617 if(bagSize_opt[iclass]<100)
618 random_shuffle(trainingPixels[iclass].begin(),trainingPixels[iclass].end());
619 if(verbose_opt[0]>1)
620 std::cout << "nctraining (class " << iclass << "): " << nctraining << std::endl;
621 trainingFeatures[iclass].resize(nctraining);
622 for(int isample=0;isample<nctraining;++isample){
623 //scale pixel values according to scale and offset!!!
624 for(int iband=0;iband<nband;++iband){
625 float value=trainingPixels[iclass][isample][iband+startBand];
626 trainingFeatures[iclass][isample].push_back((value-offset[ibag][iband])/scale[ibag][iband]);
627 }
628 }
629 assert(trainingFeatures[iclass].size()==nctraining);
630 }
631
632 unsigned int nFeatures=trainingFeatures[0][0].size();
633 if(verbose_opt[0]>=1)
634 std::cout << "number of features: " << nFeatures << std::endl;
635 unsigned int ntraining=0;
636 for(short iclass=0;iclass<nclass;++iclass)
637 ntraining+=trainingFeatures[iclass].size();
638 if(verbose_opt[0]>=1)
639 std::cout << "training size over all classes: " << ntraining << std::endl;
640
641 prob[ibag].l=ntraining;
642 prob[ibag].y = Malloc(double,prob[ibag].l);
643 prob[ibag].x = Malloc(struct svm_node *,prob[ibag].l);
644 x_space[ibag] = Malloc(struct svm_node,(nFeatures+1)*ntraining);
645 unsigned long int spaceIndex=0;
646 int lIndex=0;
647 for(short iclass=0;iclass<nclass;++iclass){
648 for(int isample=0;isample<trainingFeatures[iclass].size();++isample){
649 prob[ibag].x[lIndex]=&(x_space[ibag][spaceIndex]);
650 for(int ifeature=0;ifeature<nFeatures;++ifeature){
651 x_space[ibag][spaceIndex].index=ifeature+1;
652 x_space[ibag][spaceIndex].value=trainingFeatures[iclass][isample][ifeature];
653 ++spaceIndex;
654 }
655 x_space[ibag][spaceIndex++].index=-1;
656 prob[ibag].y[lIndex]=iclass;
657 ++lIndex;
658 }
659 }
660 assert(lIndex==prob[ibag].l);
661
662 //set SVM parameters through command line options
663 param[ibag].svm_type = svmMap[svm_type_opt[0]];
664 param[ibag].kernel_type = kernelMap[kernel_type_opt[0]];
665 param[ibag].degree = kernel_degree_opt[0];
666 param[ibag].gamma = (gamma_opt[0]>0)? gamma_opt[0] : 1.0/nFeatures;
667 param[ibag].coef0 = coef0_opt[0];
668 param[ibag].nu = nu_opt[0];
669 param[ibag].cache_size = cache_opt[0];
670 param[ibag].C = ccost_opt[0];
671 param[ibag].eps = epsilon_tol_opt[0];
672 param[ibag].p = epsilon_loss_opt[0];
673 param[ibag].shrinking = (shrinking_opt[0])? 1 : 0;
674 param[ibag].probability = (prob_est_opt[0])? 1 : 0;
675 param[ibag].nr_weight = 0;//not used: I use priors and balancing
676 param[ibag].weight_label = NULL;
677 param[ibag].weight = NULL;
678 param[ibag].verbose=(verbose_opt[0]>1)? true:false;
679
680 if(verbose_opt[0]>1)
681 std::cout << "checking parameters" << std::endl;
682 svm_check_parameter(&prob[ibag],&param[ibag]);
683 if(verbose_opt[0])
684 std::cout << "parameters ok, training" << std::endl;
685 svm[ibag]=svm_train(&prob[ibag],&param[ibag]);
686 if(verbose_opt[0]>1)
687 std::cout << "SVM is now trained" << std::endl;
688 if(cv_opt[0]>1){
689 if(verbose_opt[0]>1)
690 std::cout << "Cross validating" << std::endl;
691 double *target = Malloc(double,prob[ibag].l);
692 svm_cross_validation(&prob[ibag],&param[ibag],cv_opt[0],target);
693 assert(param[ibag].svm_type != EPSILON_SVR&&param[ibag].svm_type != NU_SVR);//only for regression
694
695 for(int i=0;i<prob[ibag].l;i++){
696 string refClassName=nameVector[prob[ibag].y[i]];
697 string className=nameVector[target[i]];
698 if(classValueMap.size())
699 cm.incrementResult(type2string<short>(classValueMap[refClassName]),type2string<short>(classValueMap[className]),1.0/nbag);
700 else
701 cm.incrementResult(cm.getClass(prob[ibag].y[i]),cm.getClass(target[i]),1.0/nbag);
702 }
703 free(target);
704 }
705 // *NOTE* Because svm_model contains pointers to svm_problem, you can
706 // not free the memory used by svm_problem if you are still using the
707 // svm_model produced by svm_train().
708 }//for ibag
709 if(cv_opt[0]>1){
710 assert(cm.nReference());
711 cm.setFormat(cmformat_opt[0]);
712 cm.reportSE95(false);
713 std::cout << cm << std::endl;
714 // cout << "class #samples userAcc prodAcc" << endl;
715 // double se95_ua=0;
716 // double se95_pa=0;
717 // double se95_oa=0;
718 // double dua=0;
719 // double dpa=0;
720 // double doa=0;
721 // for(short iclass=0;iclass<cm.nClasses();++iclass){
722 // dua=cm.ua(cm.getClass(iclass),&se95_ua);
723 // dpa=cm.pa(cm.getClass(iclass),&se95_pa);
724 // cout << cm.getClass(iclass) << " " << cm.nReference(cm.getClass(iclass)) << " " << dua << " (" << se95_ua << ")" << " " << dpa << " (" << se95_pa << ")" << endl;
725 // }
726 // std::cout << "Kappa: " << cm.kappa() << std::endl;
727 // doa=cm.oa(&se95_oa);
728 // std::cout << "Overall Accuracy: " << 100*doa << " (" << 100*se95_oa << ")" << std::endl;
729 }
730
731 //--------------------------------- end of training -----------------------------------
732 if(input_opt.empty())
733 exit(0);
734
735 const char* pszMessage;
736 void* pProgressArg=NULL;
737 GDALProgressFunc pfnProgress=GDALTermProgress;
738 float progress=0;
739 if(!verbose_opt[0])
740 pfnProgress(progress,pszMessage,pProgressArg);
741 //-------------------------------- open image file ------------------------------------
742 bool inputIsRaster=false;
743 ImgReaderOgr imgReaderOgr;
744 try{
745 imgReaderOgr.open(input_opt[0]);
746 imgReaderOgr.close();
747 }
748 catch(string errorString){
749 inputIsRaster=true;
750 }
751 if(!inputIsRaster){//classify vector file
752 cm.clearResults();
753 //notice that fields have already been set by readDataImageOgr (taking into account appropriate bands)
754 for(int ivalidation=0;ivalidation<input_opt.size();++ivalidation){
755 if(output_opt.size())
756 assert(output_opt.size()==input_opt.size());
757 if(verbose_opt[0])
758 std::cout << "opening img reader " << input_opt[ivalidation] << std::endl;
759 imgReaderOgr.open(input_opt[ivalidation]);
760 ImgWriterOgr imgWriterOgr;
761
762 if(output_opt.size()){
763 if(verbose_opt[0])
764 std::cout << "opening img writer and copying fields from img reader" << output_opt[ivalidation] << std::endl;
765 imgWriterOgr.open(output_opt[ivalidation],imgReaderOgr);
766 }
767 if(verbose_opt[0])
768 cout << "number of layers in input ogr file: " << imgReaderOgr.getLayerCount() << endl;
769 for(int ilayer=0;ilayer<imgReaderOgr.getLayerCount();++ilayer){
770 if(verbose_opt[0])
771 cout << "processing input layer " << ilayer << endl;
772 if(output_opt.size()){
773 if(verbose_opt[0])
774 std::cout << "creating field class" << std::endl;
775 if(classValueMap.size())
776 imgWriterOgr.createField("class",OFTInteger,ilayer);
777 else
778 imgWriterOgr.createField("class",OFTString,ilayer);
779 }
780 unsigned int nFeatures=imgReaderOgr.getFeatureCount(ilayer);
781 unsigned int ifeature=0;
782 progress=0;
783 pfnProgress(progress,pszMessage,pProgressArg);
784 OGRFeature *poFeature;
785 while( (poFeature = imgReaderOgr.getLayer(ilayer)->GetNextFeature()) != NULL ){
786 if(verbose_opt[0]>1)
787 std::cout << "feature " << ifeature << std::endl;
788 if( poFeature == NULL ){
789 cout << "Warning: could not read feature " << ifeature << " in layer " << imgReaderOgr.getLayerName(ilayer) << endl;
790 continue;
791 }
792 OGRFeature *poDstFeature = NULL;
793 if(output_opt.size()){
794 poDstFeature=imgWriterOgr.createFeature(ilayer);
795 if( poDstFeature->SetFrom( poFeature, TRUE ) != OGRERR_NONE ){
796 CPLError( CE_Failure, CPLE_AppDefined,
797 "Unable to translate feature %d from layer %s.\n",
798 poFeature->GetFID(), imgWriterOgr.getLayerName(ilayer).c_str() );
799 OGRFeature::DestroyFeature( poFeature );
800 OGRFeature::DestroyFeature( poDstFeature );
801 }
802 }
803 vector<float> validationPixel;
804 vector<float> validationFeature;
805
806 imgReaderOgr.readData(validationPixel,OFTReal,fields,poFeature,ilayer);
807 assert(validationPixel.size()==nband);
808 vector<float> probOut(nclass);//posterior prob for each class
809 for(short iclass=0;iclass<nclass;++iclass)
810 probOut[iclass]=0;
811 for(int ibag=0;ibag<nbag;++ibag){
812 for(int iband=0;iband<nband;++iband){
813 validationFeature.push_back((validationPixel[iband]-offset[ibag][iband])/scale[ibag][iband]);
814 if(verbose_opt[0]==2)
815 std::cout << " " << validationFeature.back();
816 }
817 if(verbose_opt[0]==2)
818 std::cout << std::endl;
819 vector<double> result(nclass);
820 struct svm_node *x;
821 x = (struct svm_node *) malloc((validationFeature.size()+1)*sizeof(struct svm_node));
822 for(int i=0;i<validationFeature.size();++i){
823 x[i].index=i+1;
824 x[i].value=validationFeature[i];
825 }
826
827 x[validationFeature.size()].index=-1;//to end svm feature vector
828 double predict_label=0;
829 if(!prob_est_opt[0]){
830 predict_label = svm_predict(svm[ibag],x);
831 for(short iclass=0;iclass<nclass;++iclass){
832 if(iclass==static_cast<short>(predict_label))
833 result[iclass]=1;
834 else
835 result[iclass]=0;
836 }
837 }
838 else{
839 assert(svm_check_probability_model(svm[ibag]));
840 predict_label = svm_predict_probability(svm[ibag],x,&(result[0]));
841 }
842 if(verbose_opt[0]>1){
843 std::cout << "predict_label: " << predict_label << std::endl;
844 for(int iclass=0;iclass<result.size();++iclass)
845 std::cout << result[iclass] << " ";
846 std::cout << std::endl;
847 }
848
849 //calculate posterior prob of bag
850 for(short iclass=0;iclass<nclass;++iclass){
851 switch(comb_opt[0]){
852 default:
853 case(0)://sum rule
854 probOut[iclass]+=result[iclass]*priors[iclass];//add probabilities for each bag
855 break;
856 case(1)://product rule
857 probOut[iclass]*=pow(static_cast<float>(priors[iclass]),static_cast<float>(1.0-nbag)/nbag)*result[iclass];//multiply probabilities for each bag
858 break;
859 case(2)://max rule
860 if(priors[iclass]*result[iclass]>probOut[iclass])
861 probOut[iclass]=priors[iclass]*result[iclass];
862 break;
863 }
864 }
865 free(x);
866 }//for ibag
867
868 //search for max class prob
869 float maxBag=0;
870 float normBag=0;
871 string classOut="Unclassified";
872 for(short iclass=0;iclass<nclass;++iclass){
873 if(verbose_opt[0]>1)
874 std::cout << probOut[iclass] << " ";
875 if(probOut[iclass]>maxBag){
876 maxBag=probOut[iclass];
877 classOut=nameVector[iclass];
878 }
879 }
880 //look for class name
881 if(verbose_opt[0]>1){
882 if(classValueMap.size())
883 std::cout << "->" << classValueMap[classOut] << std::endl;
884 else
885 std::cout << "->" << classOut << std::endl;
886 }
887 if(output_opt.size()){
888 if(classValueMap.size())
889 poDstFeature->SetField("class",classValueMap[classOut]);
890 else
891 poDstFeature->SetField("class",classOut.c_str());
892 poDstFeature->SetFID( poFeature->GetFID() );
893 }
894 int labelIndex=poFeature->GetFieldIndex(label_opt[0].c_str());
895 if(labelIndex>=0){
896 string classRef=poFeature->GetFieldAsString(labelIndex);
897 if(classRef!="0"){
898 if(classValueMap.size())
899 cm.incrementResult(type2string<short>(classValueMap[classRef]),type2string<short>(classValueMap[classOut]),1);
900 else
901 cm.incrementResult(classRef,classOut,1);
902 }
903 }
904 CPLErrorReset();
905 if(output_opt.size()){
906 if(imgWriterOgr.createFeature(poDstFeature,ilayer) != OGRERR_NONE){
907 CPLError( CE_Failure, CPLE_AppDefined,
908 "Unable to translate feature %d from layer %s.\n",
909 poFeature->GetFID(), imgWriterOgr.getLayerName(ilayer).c_str() );
910 OGRFeature::DestroyFeature( poDstFeature );
911 OGRFeature::DestroyFeature( poDstFeature );
912 }
913 }
914 ++ifeature;
915 if(!verbose_opt[0]){
916 progress=static_cast<float>(ifeature+1.0)/nFeatures;
917 pfnProgress(progress,pszMessage,pProgressArg);
918 }
919 OGRFeature::DestroyFeature( poFeature );
920 OGRFeature::DestroyFeature( poDstFeature );
921 }//get next feature
922 }//next layer
923 imgReaderOgr.close();
924 if(output_opt.size())
925 imgWriterOgr.close();
926 }
927 if(cm.nReference()){
928 std::cout << cm << std::endl;
929 cout << "class #samples userAcc prodAcc" << endl;
930 double se95_ua=0;
931 double se95_pa=0;
932 double se95_oa=0;
933 double dua=0;
934 double dpa=0;
935 double doa=0;
936 for(short iclass=0;iclass<cm.nClasses();++iclass){
937 dua=cm.ua_pct(cm.getClass(iclass),&se95_ua);
938 dpa=cm.pa_pct(cm.getClass(iclass),&se95_pa);
939 cout << cm.getClass(iclass) << " " << cm.nReference(cm.getClass(iclass)) << " " << dua << " (" << se95_ua << ")" << " " << dpa << " (" << se95_pa << ")" << endl;
940 }
941 std::cout << "Kappa: " << cm.kappa() << std::endl;
942 doa=cm.oa(&se95_oa);
943 std::cout << "Overall Accuracy: " << 100*doa << " (" << 100*se95_oa << ")" << std::endl;
944 }
945 }
946 try{
947 if(active_opt.size())
948 activeWriter.close();
949 }
950 catch(string errorString){
951 std::cerr << "Error: errorString" << std::endl;
952 }
953
954 for(int ibag=0;ibag<nbag;++ibag){
955 // svm_destroy_param[ibag](&param[ibag]);
956 svm_destroy_param(&param[ibag]);
957 free(prob[ibag].y);
958 free(prob[ibag].x);
959 free(x_space[ibag]);
960 svm_free_and_destroy_model(&(svm[ibag]));
961 }
962 return 0;
963}
throw this class when syntax error in command line option
Definition: Optionpk.h:45
Definition: svm.h:13