00001 #pragma once
00002
00003 #include <string>
00004 #include <boost/foreach.hpp>
00005 #include "VWFeatureBase.h"
00006 #include "moses/InputType.h"
00007 #include "moses/TypeDef.h"
00008 #include "moses/Word.h"
00009
00010 namespace Moses
00011 {
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 class VWFeatureContext : public VWFeatureBase
00022 {
00023 public:
00024 VWFeatureContext(const std::string &line, size_t contextSize)
00025 : VWFeatureBase(line, vwft_targetContext), m_contextSize(contextSize) {
00026 }
00027
00028
00029
00030 virtual void operator()(const InputType &input
00031 , const TargetPhrase &targetPhrase
00032 , Discriminative::Classifier &classifier
00033 , Discriminative::FeatureVector &outFeatures) const {
00034 }
00035
00036 virtual void operator()(const InputType &input
00037 , const Range &sourceRange
00038 , Discriminative::Classifier &classifier
00039 , Discriminative::FeatureVector &outFeatures) const {
00040 }
00041
00042 virtual void SetParameter(const std::string& key, const std::string& value) {
00043 if (key == "size") {
00044 m_contextSize = Scan<size_t>(value);
00045 } else if (key == "factor-positions") {
00046
00047
00048
00049 Tokenize<size_t>(m_factorPositions, value, ",");
00050 } else {
00051 VWFeatureBase::SetParameter(key, value);
00052 }
00053 }
00054
00055 size_t GetContextSize() {
00056 return m_contextSize;
00057 }
00058
00059 protected:
00060
00061
00062
00063
00064
00065
00066 inline std::string GetWord(const Phrase &phrase, size_t posFromEnd) const {
00067 const Word &word = phrase.GetWord(phrase.GetSize() - posFromEnd - 1);
00068 if (m_factorPositions.empty()) {
00069 return word.GetString(m_targetFactors, false);
00070 } else {
00071 if (m_targetFactors.size() != 1)
00072 UTIL_THROW2("You can only use factor-positions when a single target-side factor is defined.");
00073 const std::string &fullFactor = word.GetFactor(m_targetFactors[0])->GetString().as_string();
00074
00075
00076
00077 if (fullFactor == BOS_ || fullFactor == EOS_ || fullFactor == UNKNOWN_FACTOR)
00078 return fullFactor;
00079
00080 std::string subFactor(m_factorPositions.size(), 'x');
00081 for (size_t i = 0; i < m_factorPositions.size(); i++)
00082 subFactor[i] = fullFactor[m_factorPositions[i]];
00083
00084 return subFactor;
00085 }
00086 }
00087
00088
00089 inline std::string GetSourceWord(const InputType &input, size_t pos) const {
00090 return input.GetWord(pos).GetString(m_sourceFactors, false);
00091 }
00092
00093
00094 std::vector<std::string> GetAlignedSourceWords(const Phrase &contextPhrase
00095 , const InputType &input
00096 , const AlignmentInfo &alignInfo
00097 , size_t posFromEnd) const {
00098 size_t idx = contextPhrase.GetSize() - posFromEnd - 1;
00099 std::set<size_t> alignedToTarget = alignInfo.GetAlignmentsForTarget(idx);
00100 std::vector<std::string> out;
00101 out.reserve(alignedToTarget.size());
00102 BOOST_FOREACH(size_t srcIdx, alignedToTarget) {
00103 out.push_back(GetSourceWord(input, srcIdx));
00104 }
00105 return out;
00106 }
00107
00108
00109 size_t m_contextSize;
00110
00111
00112
00113 std::vector<size_t> m_factorPositions;
00114 };
00115
00116 }