00001
00002
00003
00004
00005
00006
00007
00008 #include <sstream>
00009 #include "Rules.h"
00010 #include "ConsistentPhrase.h"
00011 #include "ConsistentPhrases.h"
00012 #include "AlignedSentence.h"
00013 #include "Rule.h"
00014 #include "Parameter.h"
00015 #include "moses/Util.h"
00016
00017 using namespace std;
00018
00019 extern bool g_debug;
00020
00021 Rules::Rules(const AlignedSentence &alignedSentence)
00022 :m_alignedSentence(alignedSentence)
00023 {
00024 }
00025
00026 Rules::~Rules()
00027 {
00028 Moses::RemoveAllInColl(m_keepRules);
00029 }
00030
00031 void Rules::CreateRules(const ConsistentPhrase &cp,
00032 const Parameter ¶ms)
00033 {
00034 if (params.hieroSourceLHS) {
00035 const NonTerm &nonTerm = cp.GetHieroNonTerm();
00036 CreateRule(nonTerm, params);
00037 } else {
00038 const ConsistentPhrase::NonTerms &nonTerms = cp.GetNonTerms();
00039 for (size_t i = 0; i < nonTerms.size(); ++i) {
00040 const NonTerm &nonTerm = nonTerms[i];
00041 CreateRule(nonTerm, params);
00042 }
00043 }
00044 }
00045
00046 void Rules::CreateRule(const NonTerm &nonTerm,
00047 const Parameter ¶ms)
00048 {
00049 Rule *rule = new Rule(nonTerm, m_alignedSentence);
00050
00051 rule->Prevalidate(params);
00052 rule->CreateTarget(params);
00053 rule->CreateProperties(params);
00054
00055 if (rule->CanRecurse()) {
00056 Extend(*rule, params);
00057 }
00058
00059 if (rule->IsValid()) {
00060 m_keepRules.insert(rule);
00061 } else {
00062 delete rule;
00063 }
00064
00065 }
00066
00067 void Rules::Extend(const Parameter ¶ms)
00068 {
00069 const ConsistentPhrases &allCPS = m_alignedSentence.GetConsistentPhrases();
00070
00071 size_t size = m_alignedSentence.GetPhrase(Moses::Input).size();
00072 for (size_t sourceStart = 0; sourceStart < size; ++sourceStart) {
00073 for (size_t sourceEnd = sourceStart; sourceEnd < size; ++sourceEnd) {
00074 const ConsistentPhrases::Coll &cps = allCPS.GetColl(sourceStart, sourceEnd);
00075
00076 ConsistentPhrases::Coll::const_iterator iter;
00077 for (iter = cps.begin(); iter != cps.end(); ++iter) {
00078 const ConsistentPhrase &cp = **iter;
00079 CreateRules(cp, params);
00080 }
00081 }
00082 }
00083 }
00084
00085 void Rules::Extend(const Rule &rule, const Parameter ¶ms)
00086 {
00087 const ConsistentPhrases &allCPS = m_alignedSentence.GetConsistentPhrases();
00088 int sourceMin = rule.GetNextSourcePosForNonTerm();
00089
00090 int ruleStart = rule.GetConsistentPhrase().corners[0];
00091 int ruleEnd = rule.GetConsistentPhrase().corners[1];
00092
00093 for (int sourceStart = sourceMin; sourceStart <= ruleEnd; ++sourceStart) {
00094 for (int sourceEnd = sourceStart; sourceEnd <= ruleEnd; ++sourceEnd) {
00095 if (sourceStart == ruleStart && sourceEnd == ruleEnd) {
00096
00097 continue;
00098 }
00099
00100 const ConsistentPhrases::Coll &cps = allCPS.GetColl(sourceStart, sourceEnd);
00101 Extend(rule, cps, params);
00102 }
00103 }
00104 }
00105
00106 void Rules::Extend(const Rule &rule, const ConsistentPhrases::Coll &cps, const Parameter ¶ms)
00107 {
00108 ConsistentPhrases::Coll::const_iterator iter;
00109 for (iter = cps.begin(); iter != cps.end(); ++iter) {
00110 const ConsistentPhrase &cp = **iter;
00111 Extend(rule, cp, params);
00112 }
00113 }
00114
00115 void Rules::Extend(const Rule &rule, const ConsistentPhrase &cp, const Parameter ¶ms)
00116 {
00117 const ConsistentPhrase::NonTerms &nonTerms = cp.GetNonTerms();
00118 for (size_t i = 0; i < nonTerms.size(); ++i) {
00119 const NonTerm &nonTerm = nonTerms[i];
00120
00121 Rule *newRule = new Rule(rule, nonTerm);
00122 newRule->Prevalidate(params);
00123 newRule->CreateTarget(params);
00124 newRule->CreateProperties(params);
00125
00126 if (newRule->CanRecurse()) {
00127
00128 Extend(*newRule, params);
00129 }
00130
00131 if (newRule->IsValid()) {
00132 m_keepRules.insert(newRule);
00133 } else {
00134 delete newRule;
00135 }
00136 }
00137 }
00138
00139 std::string Rules::Debug() const
00140 {
00141 stringstream out;
00142
00143 std::set<Rule*>::const_iterator iter;
00144 out << "m_keepRules:" << endl;
00145 for (iter = m_keepRules.begin(); iter != m_keepRules.end(); ++iter) {
00146 const Rule &rule = **iter;
00147 out << rule.Debug() << endl;
00148 }
00149
00150 return out.str();
00151 }
00152
00153 void Rules::Output(std::ostream &out, bool forward, const Parameter ¶ms) const
00154 {
00155 std::set<Rule*, CompareRules>::const_iterator iter;
00156 for (iter = m_mergeRules.begin(); iter != m_mergeRules.end(); ++iter) {
00157 const Rule &rule = **iter;
00158 rule.Output(out, forward);
00159 out << endl;
00160 }
00161 }
00162
00163 void Rules::Consolidate(const Parameter ¶ms)
00164 {
00165 if (params.fractionalCounting) {
00166 CalcFractionalCount();
00167 } else {
00168 std::set<Rule*>::iterator iter;
00169 for (iter = m_keepRules.begin(); iter != m_keepRules.end(); ++iter) {
00170 Rule &rule = **iter;
00171 rule.SetCount(1);
00172 }
00173 }
00174
00175 MergeRules(params);
00176 }
00177
00178 void Rules::MergeRules(const Parameter ¶ms)
00179 {
00180 typedef std::set<Rule*, CompareRules> MergeRules;
00181
00182 std::set<Rule*>::const_iterator iterOrig;
00183 for (iterOrig = m_keepRules.begin(); iterOrig != m_keepRules.end(); ++iterOrig) {
00184 Rule *origRule = *iterOrig;
00185
00186 pair<MergeRules::iterator, bool> inserted = m_mergeRules.insert(origRule);
00187 if (!inserted.second) {
00188
00189 Rule &rule = **inserted.first;
00190 float newCount = rule.GetCount() + origRule->GetCount();
00191 rule.SetCount(newCount);
00192 }
00193 }
00194 }
00195
00196 void Rules::CalcFractionalCount()
00197 {
00198 typedef std::set<Rule*> RuleColl;
00199 typedef std::map<const ConsistentPhrase*, RuleColl> RuleByConsistentPhrase;
00200 RuleByConsistentPhrase allRules;
00201
00202
00203 std::set<Rule*>::const_iterator iter;
00204 for (iter = m_keepRules.begin(); iter != m_keepRules.end(); ++iter) {
00205 Rule *rule = *iter;
00206 const ConsistentPhrase &cp = rule->GetConsistentPhrase();
00207 RuleColl &ruleColl = allRules[&cp];
00208 ruleColl.insert(rule);
00209 }
00210
00211
00212 RuleByConsistentPhrase::iterator iterOuter;
00213 for (iterOuter = allRules.begin(); iterOuter != allRules.end(); ++iterOuter) {
00214 RuleColl &rules = iterOuter->second;
00215
00216 RuleColl::iterator iterInner;
00217 for (iterInner = rules.begin(); iterInner != rules.end(); ++iterInner) {
00218 Rule &rule = **iterInner;
00219 rule.SetCount(1.0f / (float) rules.size());
00220 }
00221 }
00222
00223 }
00224
00225