00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #include <algorithm>
00021 #include <cmath>
00022 #include <fstream>
00023 #include <sstream>
00024 #include <stdexcept>
00025
00026 #if defined __MINGW32__ && defined WITH_THREADS
00027 #include <boost/thread/locks.hpp>
00028 #endif // WITH_THREADS
00029
00030 #include "FeatureVector.h"
00031 #include "util/string_piece_hash.hh"
00032 #include "util/string_stream.hh"
00033
00034 using namespace std;
00035
00036
00037 namespace Moses
00038 {
00039
00040 const string FName::SEP = "_";
00041 FName::Name2Id FName::name2id;
00042 vector<string> FName::id2name;
00043 FName::Id2Count FName::id2hopeCount;
00044 FName::Id2Count FName::id2fearCount;
00045 #ifdef WITH_THREADS
00046 boost::shared_mutex FName::m_idLock;
00047 #endif
00048
00049 void FName::init(const StringPiece &name)
00050 {
00051 #ifdef WITH_THREADS
00052
00053 boost::shared_lock<boost::shared_mutex> lock(m_idLock);
00054 #endif
00055 Name2Id::iterator i = FindStringPiece(name2id, name);
00056 if (i != name2id.end()) {
00057 m_id = i->second;
00058 } else {
00059 #ifdef WITH_THREADS
00060
00061 lock.unlock();
00062 boost::unique_lock<boost::shared_mutex> write_lock(m_idLock);
00063 #endif
00064 std::pair<std::string, size_t> to_ins;
00065 to_ins.first.assign(name.data(), name.size());
00066 to_ins.second = name2id.size();
00067 std::pair<Name2Id::iterator, bool> res(name2id.insert(to_ins));
00068 if (res.second) {
00069
00070 id2name.push_back(to_ins.first);
00071 }
00072 m_id = res.first->second;
00073 }
00074 }
00075
00076 size_t FName::getId(const string& name)
00077 {
00078 Name2Id::iterator i = name2id.find(name);
00079 assert (i != name2id.end());
00080 return i->second;
00081 }
00082
00083 size_t FName::getHopeIdCount(const string& name)
00084 {
00085 Name2Id::iterator i = name2id.find(name);
00086 if (i != name2id.end()) {
00087 float id = i->second;
00088 return id2hopeCount[id];
00089 }
00090 return 0;
00091 }
00092
00093 size_t FName::getFearIdCount(const string& name)
00094 {
00095 Name2Id::iterator i = name2id.find(name);
00096 if (i != name2id.end()) {
00097 float id = i->second;
00098 return id2fearCount[id];
00099 }
00100 return 0;
00101 }
00102
00103 void FName::incrementHopeId(const string& name)
00104 {
00105 Name2Id::iterator i = name2id.find(name);
00106 assert(i != name2id.end());
00107 #ifdef WITH_THREADS
00108
00109 boost::upgrade_lock<boost::shared_mutex> upgradeLock(m_idLock);
00110 boost::upgrade_to_unique_lock<boost::shared_mutex> uniqueLock(upgradeLock);
00111 #endif
00112 id2hopeCount[i->second] += 1;
00113 }
00114
00115 void FName::incrementFearId(const string& name)
00116 {
00117 Name2Id::iterator i = name2id.find(name);
00118 assert(i != name2id.end());
00119 #ifdef WITH_THREADS
00120
00121 boost::upgrade_lock<boost::shared_mutex> upgradeLock(m_idLock);
00122 boost::upgrade_to_unique_lock<boost::shared_mutex> uniqueLock(upgradeLock);
00123 #endif
00124 id2fearCount[i->second] += 1;
00125 }
00126
00127 void FName::eraseId(size_t id)
00128 {
00129 #ifdef WITH_THREADS
00130
00131 boost::upgrade_lock<boost::shared_mutex> upgradeLock(m_idLock);
00132 boost::upgrade_to_unique_lock<boost::shared_mutex> uniqueLock(upgradeLock);
00133 #endif
00134 id2hopeCount.erase(id);
00135 id2fearCount.erase(id);
00136 }
00137
00138 std::ostream& operator<<( std::ostream& out, const FName& name)
00139 {
00140 out << name.name();
00141 return out;
00142 }
00143
00144 size_t FName::hash() const
00145 {
00146 return boost::hash_value(m_id);
00147 }
00148
00149 const std::string& FName::name() const
00150 {
00151 return id2name[m_id];
00152 }
00153
00154
00155 bool FName::operator==(const FName& rhs) const
00156 {
00157 return m_id == rhs.m_id;
00158 }
00159
00160 bool FName::operator!=(const FName& rhs) const
00161 {
00162 return ! (*this == rhs);
00163 }
00164
00165 FVector::FVector(size_t coreFeatures) : m_coreFeatures(coreFeatures) {}
00166
00167 void FVector::resize(size_t newsize)
00168 {
00169 valarray<FValue> oldValues(m_coreFeatures);
00170 m_coreFeatures.resize(newsize);
00171 for (size_t i = 0; i < min(m_coreFeatures.size(), oldValues.size()); ++i) {
00172 m_coreFeatures[i] = oldValues[i];
00173 }
00174 }
00175
00176 void FVector::clear()
00177 {
00178 m_coreFeatures.resize(m_coreFeatures.size(), 0);
00179 m_features.clear();
00180 }
00181
00182 bool FVector::load(const std::string& filename)
00183 {
00184 clear();
00185 ifstream in (filename.c_str());
00186 if (!in) {
00187 return false;
00188 }
00189 string line;
00190 while(getline(in,line)) {
00191 if (line[0] == '#') continue;
00192 istringstream linestream(line);
00193 string namestring;
00194 FValue value;
00195 linestream >> namestring;
00196 linestream >> value;
00197 FName fname(namestring);
00198
00199 set(fname,value);
00200 }
00201 return true;
00202 }
00203
00204 void FVector::save(const string& filename) const
00205 {
00206 ofstream out(filename.c_str());
00207 if (!out) {
00208 util::StringStream msg;
00209 msg << "Unable to open " << filename;
00210 throw runtime_error(msg.str());
00211 }
00212 write(out);
00213 out.close();
00214 }
00215
00216 void FVector::write(ostream& out,const string& sep, const string& linesep) const
00217 {
00218 for (const_iterator i = cbegin(); i != cend(); ++i) {
00219 out << i->first << sep << i->second << linesep;
00220 }
00221 }
00222
00223 static bool equalsTolerance(FValue lhs, FValue rhs)
00224 {
00225 if (lhs == rhs) return true;
00226 static const FValue TOLERANCE = 1e-4;
00227 FValue diff = abs(lhs-rhs);
00228 FValue mean = (abs(lhs)+abs(rhs))/2;
00229
00230 return diff/mean < TOLERANCE ;
00231 }
00232
00233 bool FVector::operator== (const FVector& rhs) const
00234 {
00235 if (this == &rhs) {
00236 return true;
00237 }
00238 if (m_coreFeatures.size() != rhs.m_coreFeatures.size()) {
00239 return false;
00240 }
00241 for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
00242 if (!equalsTolerance(m_coreFeatures[i], rhs.m_coreFeatures[i])) return false;
00243 }
00244 for (const_iterator i = cbegin(); i != cend(); ++i) {
00245 if (!equalsTolerance(i->second,rhs.get(i->first))) return false;
00246 }
00247 for (const_iterator i = rhs.cbegin(); i != rhs.cend(); ++i) {
00248 if (!equalsTolerance(i->second, get(i->first))) return false;
00249 }
00250 return true;
00251 }
00252
00253 bool FVector::operator!= (const FVector& rhs) const
00254 {
00255 return ! (*this == rhs);
00256 }
00257
00258 ProxyFVector FVector::operator[](const FName& name)
00259 {
00260
00261
00262 return ProxyFVector(this, name);
00263 }
00264
00266 FValue& FVector::operator[](size_t index)
00267 {
00268 return m_coreFeatures[index];
00269 }
00270
00271
00272 FValue FVector::operator[](const FName& name) const
00273 {
00274 return get(name);
00275 }
00276
00277 FValue FVector::operator[](size_t index) const
00278 {
00279 return m_coreFeatures[index];
00280 }
00281
00282 ostream& FVector::print(ostream& out) const
00283 {
00284 out << "core=(";
00285 for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
00286 out << m_coreFeatures[i];
00287 if (i + 1 < m_coreFeatures.size()) {
00288 out << ",";
00289 }
00290 }
00291 out << ") ";
00292 for (const_iterator i = cbegin(); i != cend(); ++i) {
00293 if (i != cbegin())
00294 out << " ";
00295 out << i->first << "=" << i->second;
00296 }
00297 return out;
00298 }
00299
00300 ostream& operator<<(ostream& out, const FVector& fv)
00301 {
00302 return fv.print(out);
00303 }
00304
00305 const FValue& FVector::get(const FName& name) const
00306 {
00307 static const FValue DEFAULT = 0;
00308 const_iterator fi = m_features.find(name);
00309 if (fi == m_features.end()) {
00310 return DEFAULT;
00311 } else {
00312 return fi->second;
00313 }
00314 }
00315
00316 FValue FVector::getBackoff(const FName& name, float backoff) const
00317 {
00318 const_iterator fi = m_features.find(name);
00319 if (fi == m_features.end()) {
00320 return backoff;
00321 } else {
00322 return fi->second;
00323 }
00324 }
00325
00326 void FVector::thresholdScale(FValue maxValue )
00327 {
00328 FValue factor = 1.0;
00329 for (const_iterator i = cbegin(); i != cend(); ++i) {
00330 FValue value = i->second;
00331 if (abs(value)*factor > maxValue) {
00332 factor = abs(value) / maxValue;
00333 }
00334 }
00335 operator*=(factor);
00336 }
00337
00338 void FVector::capMax(FValue maxValue)
00339 {
00340 for (const_iterator i = cbegin(); i != cend(); ++i)
00341 if (i->second > maxValue)
00342 set(i->first, maxValue);
00343 }
00344
00345 void FVector::capMin(FValue minValue)
00346 {
00347 for (const_iterator i = cbegin(); i != cend(); ++i)
00348 if (i->second < minValue)
00349 set(i->first, minValue);
00350 }
00351
00352 void FVector::set(const FName& name, const FValue& value)
00353 {
00354 m_features[name] = value;
00355 }
00356
00357 void FVector::printCoreFeatures()
00358 {
00359 cerr << "core=(";
00360 for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
00361 cerr << m_coreFeatures[i];
00362 if (i + 1 < m_coreFeatures.size()) {
00363 cerr << ",";
00364 }
00365 }
00366 cerr << ") ";
00367 }
00368
00369 FVector& FVector::operator+= (const FVector& rhs)
00370 {
00371 if (rhs.m_coreFeatures.size() > m_coreFeatures.size())
00372 resize(rhs.m_coreFeatures.size());
00373 for (const_iterator i = rhs.cbegin(); i != rhs.cend(); ++i)
00374 set(i->first, get(i->first) + i->second);
00375 for (size_t i = 0; i < rhs.m_coreFeatures.size(); ++i)
00376 m_coreFeatures[i] += rhs.m_coreFeatures[i];
00377 return *this;
00378 }
00379
00380
00381 void FVector::sparsePlusEquals(const FVector& rhs)
00382 {
00383 for (const_iterator i = rhs.cbegin(); i != rhs.cend(); ++i)
00384 set(i->first, get(i->first) + i->second);
00385 }
00386
00387
00388 void FVector::corePlusEquals(const FVector& rhs)
00389 {
00390 if (rhs.m_coreFeatures.size() > m_coreFeatures.size())
00391 resize(rhs.m_coreFeatures.size());
00392 for (size_t i = 0; i < rhs.m_coreFeatures.size(); ++i)
00393 m_coreFeatures[i] += rhs.m_coreFeatures[i];
00394 }
00395
00396
00397 void FVector::coreAssign(const FVector& rhs)
00398 {
00399 for (size_t i = 0; i < rhs.m_coreFeatures.size(); ++i)
00400 m_coreFeatures[i] = rhs.m_coreFeatures[i];
00401 }
00402
00403 void FVector::incrementSparseHopeFeatures()
00404 {
00405 for (const_iterator i = cbegin(); i != cend(); ++i)
00406 FName::incrementHopeId((i->first).name());
00407 }
00408
00409 void FVector::incrementSparseFearFeatures()
00410 {
00411 for (const_iterator i = cbegin(); i != cend(); ++i)
00412 FName::incrementFearId((i->first).name());
00413 }
00414
00415 void FVector::printSparseHopeFeatureCounts(std::ofstream& out)
00416 {
00417 for (const_iterator i = cbegin(); i != cend(); ++i)
00418 out << (i->first).name() << ": " << FName::getHopeIdCount((i->first).name()) << std::endl;
00419 }
00420
00421 void FVector::printSparseFearFeatureCounts(std::ofstream& out)
00422 {
00423 for (const_iterator i = cbegin(); i != cend(); ++i)
00424 out << (i->first).name() << ": " << FName::getFearIdCount((i->first).name()) << std::endl;
00425 }
00426
00427 void FVector::printSparseHopeFeatureCounts()
00428 {
00429 for (const_iterator i = cbegin(); i != cend(); ++i)
00430 std::cerr << (i->first).name() << ": " << FName::getHopeIdCount((i->first).name()) << std::endl;
00431 }
00432
00433 void FVector::printSparseFearFeatureCounts()
00434 {
00435 for (const_iterator i = cbegin(); i != cend(); ++i)
00436 std::cerr << (i->first).name() << ": " << FName::getFearIdCount((i->first).name()) << std::endl;
00437 }
00438
00439 size_t FVector::pruneSparseFeatures(size_t threshold)
00440 {
00441 size_t count = 0;
00442 vector<FName> toErase;
00443 for (const_iterator i = cbegin(); i != cend(); ++i) {
00444 const std::string& fname = (i->first).name();
00445 if (FName::getHopeIdCount(fname) < threshold && FName::getFearIdCount(fname) < threshold) {
00446 toErase.push_back(i->first);
00447 std::cerr << "pruning: " << fname << " (" << FName::getHopeIdCount(fname) << ", " << FName::getFearIdCount(fname) << ")" << std::endl;
00448 FName::eraseId(FName::getId(fname));
00449 ++count;
00450 }
00451 }
00452
00453 for (size_t i = 0; i < toErase.size(); ++i)
00454 m_features.erase(toErase[i]);
00455
00456 return count;
00457 }
00458
00459 size_t FVector::pruneZeroWeightFeatures()
00460 {
00461 size_t count = 0;
00462 vector<FName> toErase;
00463 for (const_iterator i = cbegin(); i != cend(); ++i) {
00464 const std::string& fname = (i->first).name();
00465 if (i->second == 0) {
00466 toErase.push_back(i->first);
00467
00468 FName::eraseId(FName::getId(fname));
00469 ++count;
00470 }
00471 }
00472
00473 for (size_t i = 0; i < toErase.size(); ++i)
00474 m_features.erase(toErase[i]);
00475
00476 return count;
00477 }
00478
00479 void FVector::updateConfidenceCounts(const FVector& weightUpdate, bool signedCounts)
00480 {
00481 for (size_t i = 0; i < weightUpdate.m_coreFeatures.size(); ++i) {
00482 if (signedCounts) {
00483
00484
00485 m_coreFeatures[i] += weightUpdate.m_coreFeatures[i];
00486 } else
00487
00488 m_coreFeatures[i] += abs(weightUpdate.m_coreFeatures[i]);
00489 }
00490
00491 for (const_iterator i = weightUpdate.cbegin(); i != weightUpdate.cend(); ++i) {
00492 if (weightUpdate[i->first] == 0)
00493 continue;
00494 float value = get(i->first);
00495 if (signedCounts) {
00496
00497
00498 value += weightUpdate[i->first];
00499 } else
00500
00501 value += abs(weightUpdate[i->first]);
00502 set(i->first, value);
00503 }
00504 }
00505
00506 void FVector::updateLearningRates(float decay_core, float decay_sparse, const FVector &confidenceCounts, float core_r0, float sparse_r0)
00507 {
00508 for (size_t i = 0; i < confidenceCounts.m_coreFeatures.size(); ++i) {
00509 m_coreFeatures[i] = 1.0/(1.0/core_r0 + decay_core * abs(confidenceCounts.m_coreFeatures[i]));
00510 }
00511
00512 for (const_iterator i = confidenceCounts.cbegin(); i != confidenceCounts.cend(); ++i) {
00513 float value = 1.0/(1.0/sparse_r0 + decay_sparse * abs(i->second));
00514 set(i->first, value);
00515 }
00516 }
00517
00518
00519 void FVector::setToBinaryOf(const FVector& rhs)
00520 {
00521 for (const_iterator i = rhs.cbegin(); i != rhs.cend(); ++i)
00522 if (rhs.get(i->first) != 0)
00523 set(i->first, 1);
00524 for (size_t i = 0; i < rhs.m_coreFeatures.size(); ++i)
00525 m_coreFeatures[i] = 1;
00526 }
00527
00528
00529 FVector& FVector::coreDivideEquals(float scalar)
00530 {
00531 for (size_t i = 0; i < m_coreFeatures.size(); ++i)
00532 m_coreFeatures[i] /= scalar;
00533 return *this;
00534 }
00535
00536
00537 FVector& FVector::divideEquals(const FVector& rhs)
00538 {
00539 assert(m_coreFeatures.size() == rhs.m_coreFeatures.size());
00540 for (const_iterator i = rhs.cbegin(); i != rhs.cend(); ++i)
00541 set(i->first, get(i->first)/rhs.get(i->first));
00542 for (size_t i = 0; i < rhs.m_coreFeatures.size(); ++i)
00543 m_coreFeatures[i] /= rhs.m_coreFeatures[i];
00544 return *this;
00545 }
00546
00547 FVector& FVector::operator-= (const FVector& rhs)
00548 {
00549 if (rhs.m_coreFeatures.size() > m_coreFeatures.size())
00550 resize(rhs.m_coreFeatures.size());
00551 for (const_iterator i = rhs.cbegin(); i != rhs.cend(); ++i)
00552 set(i->first, get(i->first) -(i->second));
00553 for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
00554 if (i < rhs.m_coreFeatures.size()) {
00555 m_coreFeatures[i] -= rhs.m_coreFeatures[i];
00556 }
00557 }
00558 return *this;
00559 }
00560
00561 FVector& FVector::operator*= (const FVector& rhs)
00562 {
00563 if (rhs.m_coreFeatures.size() > m_coreFeatures.size()) {
00564 resize(rhs.m_coreFeatures.size());
00565 }
00566 for (iterator i = begin(); i != end(); ++i) {
00567 FValue lhsValue = i->second;
00568 FValue rhsValue = rhs.get(i->first);
00569 set(i->first,lhsValue*rhsValue);
00570 }
00571 for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
00572 if (i < rhs.m_coreFeatures.size()) {
00573 m_coreFeatures[i] *= rhs.m_coreFeatures[i];
00574 } else {
00575 m_coreFeatures[i] = 0;
00576 }
00577 }
00578 return *this;
00579 }
00580
00581 FVector& FVector::operator/= (const FVector& rhs)
00582 {
00583 if (rhs.m_coreFeatures.size() > m_coreFeatures.size()) {
00584 resize(rhs.m_coreFeatures.size());
00585 }
00586 for (iterator i = begin(); i != end(); ++i) {
00587 FValue lhsValue = i->second;
00588 FValue rhsValue = rhs.get(i->first);
00589 set(i->first, lhsValue / rhsValue) ;
00590 }
00591 for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
00592 if (i < rhs.m_coreFeatures.size()) {
00593 m_coreFeatures[i] /= rhs.m_coreFeatures[i];
00594 } else {
00595 if (m_coreFeatures[i] < 0) {
00596 m_coreFeatures[i] = -numeric_limits<FValue>::infinity();
00597 } else if (m_coreFeatures[i] > 0) {
00598 m_coreFeatures[i] = numeric_limits<FValue>::infinity();
00599 }
00600 }
00601 }
00602 return *this;
00603 }
00604
00605 FVector& FVector::operator*= (const FValue& rhs)
00606 {
00607
00608 for (iterator i = begin(); i != end(); ++i) {
00609 i->second *= rhs;
00610 }
00611 m_coreFeatures *= rhs;
00612 return *this;
00613 }
00614
00615 FVector& FVector::operator/= (const FValue& rhs)
00616 {
00617 for (iterator i = begin(); i != end(); ++i) {
00618 i->second /= rhs;
00619 }
00620 m_coreFeatures /= rhs;
00621 return *this;
00622 }
00623
00624 FVector& FVector::multiplyEqualsBackoff(const FVector& rhs, float backoff)
00625 {
00626 if (rhs.m_coreFeatures.size() > m_coreFeatures.size()) {
00627 resize(rhs.m_coreFeatures.size());
00628 }
00629 for (iterator i = begin(); i != end(); ++i) {
00630 FValue lhsValue = i->second;
00631 FValue rhsValue = rhs.getBackoff(i->first, backoff);
00632 set(i->first,lhsValue*rhsValue);
00633 }
00634 for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
00635 if (i < rhs.m_coreFeatures.size()) {
00636 m_coreFeatures[i] *= rhs.m_coreFeatures[i];
00637 } else {
00638 m_coreFeatures[i] = 0;
00639 }
00640 }
00641 return *this;
00642 }
00643
00644 FVector& FVector::multiplyEquals(float core_r0, float sparse_r0)
00645 {
00646 for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
00647 m_coreFeatures[i] *= core_r0;
00648 }
00649 for (iterator i = begin(); i != end(); ++i)
00650 set(i->first,(i->second)*sparse_r0);
00651 return *this;
00652 }
00653
00654 FValue FVector::l1norm() const
00655 {
00656 FValue norm = 0;
00657 for (const_iterator i = cbegin(); i != cend(); ++i) {
00658 norm += abs(i->second);
00659 }
00660 for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
00661 norm += abs(m_coreFeatures[i]);
00662 }
00663 return norm;
00664 }
00665
00666 FValue FVector::l1norm_coreFeatures() const
00667 {
00668 FValue norm = 0;
00669
00670 for (size_t i = 0; i < m_coreFeatures.size()-1; ++i)
00671 norm += abs(m_coreFeatures[i]);
00672 return norm;
00673 }
00674
00675 FValue FVector::l2norm() const
00676 {
00677 return sqrt(inner_product(*this));
00678 }
00679
00680 FValue FVector::linfnorm() const
00681 {
00682 FValue norm = 0;
00683 for (const_iterator i = cbegin(); i != cend(); ++i) {
00684 float absValue = abs(i->second);
00685 if (absValue > norm)
00686 norm = absValue;
00687 }
00688 for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
00689 float absValue = abs(m_coreFeatures[i]);
00690 if (absValue > norm)
00691 norm = absValue;
00692 }
00693 return norm;
00694 }
00695
00696 size_t FVector::l1regularize(float lambda)
00697 {
00698 for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
00699 float value = m_coreFeatures[i];
00700 if (value > 0) {
00701 m_coreFeatures[i] = max(0.0f, value - lambda);
00702 } else {
00703 m_coreFeatures[i] = min(0.0f, value + lambda);
00704 }
00705 }
00706
00707 size_t numberPruned = size();
00708 vector<FName> toErase;
00709 for (iterator i = begin(); i != end(); ++i) {
00710 float value = i->second;
00711 if (value != 0.0f) {
00712 if (value > 0)
00713 value = max(0.0f, value - lambda);
00714 else
00715 value = min(0.0f, value + lambda);
00716
00717 if (value != 0.0f)
00718 i->second = value;
00719 else {
00720 toErase.push_back(i->first);
00721 const std::string& fname = (i->first).name();
00722 FName::eraseId(FName::getId(fname));
00723 }
00724 }
00725 }
00726
00727
00728 for (size_t i = 0; i < toErase.size(); ++i)
00729 m_features.erase(toErase[i]);
00730 numberPruned -= size();
00731 return numberPruned;
00732 }
00733
00734 void FVector::l2regularize(float lambda)
00735 {
00736 for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
00737 m_coreFeatures[i] *= (1 - lambda);
00738 }
00739
00740 for (iterator i = begin(); i != end(); ++i) {
00741 i->second *= (1 - lambda);
00742 }
00743 }
00744
00745 size_t FVector::sparseL1regularize(float lambda)
00746 {
00747
00748
00749
00750
00751
00752
00753
00754
00755
00756
00757 size_t numberPruned = size();
00758 vector<FName> toErase;
00759 for (iterator i = begin(); i != end(); ++i) {
00760 float value = i->second;
00761 if (value != 0.0f) {
00762 if (value > 0)
00763 value = max(0.0f, value - lambda);
00764 else
00765 value = min(0.0f, value + lambda);
00766
00767 if (value != 0.0f)
00768 i->second = value;
00769 else {
00770 toErase.push_back(i->first);
00771 const std::string& fname = (i->first).name();
00772 FName::eraseId(FName::getId(fname));
00773 }
00774 }
00775 }
00776
00777
00778 for (size_t i = 0; i < toErase.size(); ++i)
00779 m_features.erase(toErase[i]);
00780 numberPruned -= size();
00781 return numberPruned;
00782 }
00783
00784 void FVector::sparseL2regularize(float lambda)
00785 {
00786
00787
00788
00789
00790 for (iterator i = begin(); i != end(); ++i) {
00791 i->second *= (1 - lambda);
00792 }
00793 }
00794
00795 FValue FVector::sum() const
00796 {
00797 FValue sum = 0;
00798 for (const_iterator i = cbegin(); i != cend(); ++i) {
00799 sum += i->second;
00800 }
00801 sum += m_coreFeatures.sum();
00802 return sum;
00803 }
00804
00805 FValue FVector::inner_product(const FVector& rhs) const
00806 {
00807 assert(m_coreFeatures.size() == rhs.m_coreFeatures.size());
00808 FValue product = 0.0;
00809 for (const_iterator i = cbegin(); i != cend(); ++i) {
00810 product += ((i->second)*(rhs.get(i->first)));
00811 }
00812 for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
00813 product += m_coreFeatures[i]*rhs.m_coreFeatures[i];
00814 }
00815 return product;
00816 }
00817
00818 void FVector::merge(const FVector &other)
00819 {
00820
00821 for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
00822 FValue &thisVal = m_coreFeatures[i];
00823 const FValue otherVal = other.m_coreFeatures[i];
00824
00825 if (otherVal) {
00826 assert(thisVal == 0 || thisVal == otherVal);
00827 thisVal = otherVal;
00828 }
00829 }
00830
00831
00832 FNVmap::const_iterator iter;
00833 for (iter = other.m_features.begin(); iter != other.m_features.end(); ++iter) {
00834 const FName &otherKey = iter->first;
00835 const FValue otherVal = iter->second;
00836 m_features[otherKey] = otherVal;
00837 }
00838 }
00839
00840 const FVector operator+(const FVector& lhs, const FVector& rhs)
00841 {
00842 return FVector(lhs) += rhs;
00843 }
00844
00845 const FVector operator-(const FVector& lhs, const FVector& rhs)
00846 {
00847 return FVector(lhs) -= rhs;
00848 }
00849
00850 const FVector operator*(const FVector& lhs, const FVector& rhs)
00851 {
00852 return FVector(lhs) *= rhs;
00853 }
00854
00855 const FVector operator/(const FVector& lhs, const FVector& rhs)
00856 {
00857 return FVector(lhs) /= rhs;
00858 }
00859
00860
00861 const FVector operator*(const FVector& lhs, const FValue& rhs)
00862 {
00863 return FVector(lhs) *= rhs;
00864 }
00865
00866 const FVector operator/(const FVector& lhs, const FValue& rhs)
00867 {
00868 return FVector(lhs) /= rhs;
00869 }
00870
00871 FValue inner_product(const FVector& lhs, const FVector& rhs)
00872 {
00873 if (lhs.size() >= rhs.size()) {
00874 return rhs.inner_product(lhs);
00875 } else {
00876 return lhs.inner_product(rhs);
00877 }
00878 }
00879 }