forked from qieangel2013/phpml
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.php
58 lines (39 loc) · 1.45 KB
/
index.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
<?php
require_once 'vendor/autoload.php';
use Phpml\Classification\KNearestNeighbors;
use Phpml\Dataset\CsvDataset;
use Phpml\Dataset\ArrayDataset;
use Phpml\FeatureExtraction\TokenCountVectorizer;
use Phpml\Tokenization\WordTokenizer;
use Phpml\CrossValidation\StratifiedRandomSplit;
use Phpml\FeatureExtraction\TfIdfTransformer;
use Phpml\Metric\Accuracy;
use Phpml\Classification\SVC;
use Phpml\Regression\SVR;
use Phpml\SupportVectorMachine\Kernel;
$dataset = new CsvDataset('languages.csv', 1);
$vectorizer = new TokenCountVectorizer(new WordTokenizer());
$tfIdfTransformer = new TfIdfTransformer();
$testample=['我是中国人'];
$samples = [];
foreach ($dataset->getSamples() as $sample) {
$samples[] = $sample[0];
}
$vectorizer->fit($samples);
$vectorizer->transform($samples);
$vectorizer->fit($testample);
$vectorizer->transform($testample);
$tfIdfTransformer->fit($samples);
$tfIdfTransformer->transform($samples);
// $tfIdfTransformer->fit($testample);
// print_r($testample);
// exit;
// $tfIdfTransformer->transform($testample);
$dataset = new ArrayDataset($samples, $dataset->getTargets());
$randomSplit = new StratifiedRandomSplit($dataset, 0.1);
$classifier = new SVC(Kernel::RBF, 10000);
$classifier->train($randomSplit->getTrainSamples(), $randomSplit->getTrainLabels());
$predictedLabels = $classifier->predict($randomSplit->getTestSamples());
$testpredictedLabels = $classifier->predict($testample);
print_r($testpredictedLabels);
exit;