-
Notifications
You must be signed in to change notification settings - Fork 81
Expand file tree
/
Copy pathQLearn.cpp
More file actions
106 lines (82 loc) * 3.31 KB
/
QLearn.cpp
File metadata and controls
106 lines (82 loc) * 3.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#include "../include/QLearn.h"
#include <stdlib.h>
#include <iostream>
#include <fstream>
#include <chrono>
#include <thread>
#include <algorithm>
#include "../include/NeuralNet.h"
using namespace rl;
QLearn::QLearn(net::NeuralNet *modelNetwork, net::Trainer *trainer_, double learningRate_, double devaluationFactor_, std::vector possibleActions_) {
trainer = trainer_;
learningRate = learningRate_;
devaluationFactor = devaluationFactor_;
models = std::vector(possibleActions_.size());
for(unsigned int a = 0; a < possibleActions_.size(); a++) models[a] = (Model(new net::NeuralNet(modelNetwork), possibleActions_[a]));
}
QLearn::QLearn(std::vector models_, net::Trainer *trainer_, double learningRate_, double devaluationFactor_) {
models = models_;
trainer = trainer_;
learningRate = learningRate_;
devaluationFactor = devaluationFactor_;
}
QLearn::QLearn() {
}
Action QLearn::chooseBestAction(State currentState) {
lastState = currentState;
std::vector<double> rewards = getModelRewards(currentState);
lastModel = models[std::max_element(rewards.begin(), rewards.end()) - rewards.begin()];
return lastModel.action;
}
Action QLearn::chooseBoltzmanAction(State currentState, double explorationConstant) {
if(explorationConstant < 0.01) {
explorationConstant = 0.01;
}
double determiner = (double)rand() / (double)RAND_MAX;
std::vector<double> rewards = getModelRewards(currentState);
std::vector<double> exponentTerms(0);
double sumOfExponentTerms = 0;
std::for_each(rewards.begin(), rewards.end(), [&](double reward) {
double exponentTerm = exp(reward / explorationConstant);
exponentTerms.push_back(exponentTerm);
sumOfExponentTerms += exponentTerm;
});
double sumOfProbabilities = 0;
for(unsigned int a = 0; a < exponentTerms.size(); a++) {
sumOfProbabilities += (exponentTerms[a] / sumOfExponentTerms);
if(sumOfProbabilities >= determiner) {
lastModel = models[a];
lastState = currentState;
return lastModel.action;
}
}
/// Incase a floating point error resulted in no action
std::cout << "Floating point error when choosing an action using a Boltzmann selection policy! Choosing last action.";
lastModel = models[models.size() - 1];
lastState = currentState;
return lastModel.action;
}
void QLearn::applyReinforcementToLastAction(double reward, State newState) {
if(lastState.size() == 0) {
std::cout << "Called applyReinforcementToLastAction before an action had been selected! Because of this, this function call will be ignored.";
}
double lr = lastModel.network->getOutput(lastState)[0];
double targetValueForLastState = lr + learningRate*(reward+(devaluationFactor*getHighestReward(newState))-lr);
trainer->train(lastModel.network, {lastState}, {{targetValueForLastState}});
}
void QLearn::reset() {
std::for_each(models.begin(), models.end(), [&](Model model) {
model.network->randomizeWeights();
});
}
std::vector<double> QLearn::getModelRewards(State state) {
std::vector<double> rewards;
std::for_each(models.begin(), models.end(), [&](Model model) {
rewards.push_back(model.network->getOutput(state)[0]);
});
return rewards;
}
double QLearn::getHighestReward(State state) {
std::vector<double> rewards = getModelRewards(state);
return *std::max_element(rewards.begin(), rewards.end());
}