// adatok: http://repository.seasr.org/Datasets/UCI/arff/
// Tutorial: http://weka.wikispaces.com/Use+Weka+in+your+Java+code
// további info: Weka API
import java.io.BufferedReader;
import java.io.FileReader;
import weka.classifiers.Classifier;
import weka.classifiers.Evaluation;
import weka.classifiers.trees.J48;
import weka.clusterers.ClusterEvaluation;
import weka.clusterers.Clusterer;
import weka.clusterers.SimpleKMeans;
import weka.core.Instances;
import weka.filters.Filter;
public class HelloWekaWorld {
// Ez a metodus "kihamozza" a klaszterezes eredmenyekent kapott stringbol
// a helytelenul klaszterezett peldanyok szamat, es visszaadja ezt szamkent
public static float getIncorrectlyClusteredInstances(String clustering_result) {
String pattern = "Incorrectly clustered instances :";
String s = clustering_result.substring(
clustering_result.indexOf(pattern)+pattern.length());
s = s.trim();
return new Float(s.split("\t")[0]);
}
public static void main(String[] args) throws Exception {
Instances data = new Instances(new BufferedReader(
new FileReader(" (...) \\breast-cancer.arff")));
data.setClassIndex(data.numAttributes()-1);
Instances traindata = data.trainCV(10, 0);
Instances testdata = data.testCV(10, 0);
Classifier c = new J48();
((J48) c).setReducedErrorPruning(true);
((J48) c).setMinNumObj(5);
c.buildClassifier(traindata);
for (int i=0;i<testdata.numInstances();i++) {
double predicted_class = c.classifyInstance(testdata.instance(i));
System.out.println(i+" "+predicted_class);
}
Evaluation eval = new Evaluation(traindata);
eval.evaluateModel(c, testdata);
System.out.println(eval.toSummaryString("\nResults\n======\n", false));
for (int i=0;i<data.numClasses();i++) {
System.out.println("Area Under ROC Curve (class="+i+")\t"+
eval.areaUnderROC(i));
}
System.out.println("Accuracy:\t"+ (eval.pctCorrect()/100) );
// Klaszterezés
weka.filters.unsupervised.attribute.Remove filter =
new weka.filters.unsupervised.attribute.Remove();
filter.setAttributeIndices("" + (data.classIndex() + 1));
filter.setInputFormat(data);
Instances dataClusterer = Filter.useFilter(data, filter);
Clusterer clus = new SimpleKMeans();
//( (SimpleKMeans) clus).setDistanceFunction(new weka.core.ChebyshevDistance());
( (SimpleKMeans) clus).setNumClusters(2);
/* Szintén kipróbálni:
Clusterer clus = new HierarchicalClusterer();
((HierarchicalClusterer)clus).setOptions(new String[]
{ "-N", "3", "-L", "SINGLE", "-A", "weka.core.ChebyshevDistance" });
*/
// Link options: SINGLE COMPLETE AVERAGE
// Distance functions: ChebyshevDistance, EuclideanDistance, ManhattanDistance
clus.buildClusterer(dataClusterer);
ClusterEvaluation eval1 = new ClusterEvaluation();
eval1.setClusterer(clus);
eval1.evaluateClusterer(data);
System.out.println(eval1.clusterResultsToString());
// Igy tudjuk szamkent, a clustering_error elnevezesu float tipusu
// valtozoban megkapni a helytelenul klaszterezett peldanyok szamat
String cluster_results = eval1.clusterResultsToString();
float clustering_error = getIncorrectlyClusteredInstances(cluster_results);
System.out.println(clustering_error);
// Asszociációs szabályok
Apriori a = new Apriori();
a.setOptions(new String[] { "-M", "0.3", "-C", "0.7", "-N", "10000"});
a.buildAssociations(data);
System.out.println(a.toString());
FastVector[] rules = a.getAllTheRules();
int[] items = ((AprioriItemSet) rules[0].elementAt(0)).items();
for (int i=0;i<items.length;i++) System.out.println( items[i] );
}
}
// További feladatok:
// - más osztályozók kipróbálása,
// - 10-fold crossvalidation-t végéigcsinálni,
// - hiperparaméterkeresés
|