00001 #include "RuleTrie.h"
00002
00003 #include <map>
00004 #include <vector>
00005
00006 #include <boost/functional/hash.hpp>
00007 #include <boost/unordered_map.hpp>
00008 #include <boost/version.hpp>
00009
00010 #include "moses/NonTerminal.h"
00011 #include "moses/TargetPhrase.h"
00012 #include "moses/TargetPhraseCollection.h"
00013 #include "moses/Util.h"
00014 #include "moses/Word.h"
00015
00016 namespace Moses
00017 {
00018 namespace Syntax
00019 {
00020 namespace T2S
00021 {
00022
00023 void RuleTrie::Node::Prune(std::size_t tableLimit)
00024 {
00025
00026 for (SymbolMap::iterator p = m_sourceTermMap.begin();
00027 p != m_sourceTermMap.end(); ++p) {
00028 p->second.Prune(tableLimit);
00029 }
00030 for (SymbolMap::iterator p = m_nonTermMap.begin();
00031 p != m_nonTermMap.end(); ++p) {
00032 p->second.Prune(tableLimit);
00033 }
00034
00035
00036 for (TPCMap::iterator p = m_targetPhraseCollections.begin();
00037 p != m_targetPhraseCollections.end(); ++p) {
00038 p->second->Prune(true, tableLimit);
00039 }
00040 }
00041
00042 void RuleTrie::Node::Sort(std::size_t tableLimit)
00043 {
00044
00045 for (SymbolMap::iterator p = m_sourceTermMap.begin();
00046 p != m_sourceTermMap.end(); ++p) {
00047 p->second.Sort(tableLimit);
00048 }
00049 for (SymbolMap::iterator p = m_nonTermMap.begin();
00050 p != m_nonTermMap.end(); ++p) {
00051 p->second.Sort(tableLimit);
00052 }
00053
00054
00055 for (TPCMap::iterator p = m_targetPhraseCollections.begin();
00056 p != m_targetPhraseCollections.end(); ++p) {
00057 p->second->Sort(true, tableLimit);
00058 }
00059 }
00060
00061 RuleTrie::Node*
00062 RuleTrie::Node::
00063 GetOrCreateChild(const Word &sourceTerm)
00064 {
00065 return &m_sourceTermMap[sourceTerm];
00066 }
00067
00068 RuleTrie::Node *
00069 RuleTrie::
00070 Node::
00071 GetOrCreateNonTerminalChild(const Word &targetNonTerm)
00072 {
00073 UTIL_THROW_IF2(!targetNonTerm.IsNonTerminal(),
00074 "Not a non-terminal: " << targetNonTerm);
00075
00076 return &m_nonTermMap[targetNonTerm];
00077 }
00078
00079 TargetPhraseCollection::shared_ptr
00080 RuleTrie::
00081 Node::
00082 GetOrCreateTargetPhraseCollection(const Word &sourceLHS)
00083 {
00084 UTIL_THROW_IF2(!sourceLHS.IsNonTerminal(),
00085 "Not a non-terminal: " << sourceLHS);
00086 TargetPhraseCollection::shared_ptr& foo
00087 = m_targetPhraseCollections[sourceLHS];
00088 if (!foo) foo.reset(new TargetPhraseCollection);
00089 return foo;
00090 }
00091
00092 RuleTrie::Node const*
00093 RuleTrie::
00094 Node::
00095 GetChild(const Word &sourceTerm) const
00096 {
00097 UTIL_THROW_IF2(sourceTerm.IsNonTerminal(), "Not a terminal: " << sourceTerm);
00098 SymbolMap::const_iterator p = m_sourceTermMap.find(sourceTerm);
00099 return (p == m_sourceTermMap.end()) ? NULL : &p->second;
00100 }
00101
00102 RuleTrie::Node const*
00103 RuleTrie::
00104 Node::
00105 GetNonTerminalChild(const Word &targetNonTerm) const
00106 {
00107 UTIL_THROW_IF2(!targetNonTerm.IsNonTerminal(),
00108 "Not a non-terminal: " << targetNonTerm);
00109 SymbolMap::const_iterator p = m_nonTermMap.find(targetNonTerm);
00110 return (p == m_nonTermMap.end()) ? NULL : &p->second;
00111 }
00112
00113 TargetPhraseCollection::shared_ptr
00114 RuleTrie::
00115 GetOrCreateTargetPhraseCollection
00116 ( const Word &sourceLHS, const Phrase &sourceRHS )
00117 {
00118 Node &currNode = GetOrCreateNode(sourceRHS);
00119 return currNode.GetOrCreateTargetPhraseCollection(sourceLHS);
00120 }
00121
00122 RuleTrie::Node &
00123 RuleTrie::
00124 GetOrCreateNode(const Phrase &sourceRHS)
00125 {
00126 const std::size_t size = sourceRHS.GetSize();
00127
00128 Node *currNode = &m_root;
00129 for (std::size_t pos = 0 ; pos < size ; ++pos) {
00130 const Word& word = sourceRHS.GetWord(pos);
00131
00132 if (word.IsNonTerminal()) {
00133 currNode = currNode->GetOrCreateNonTerminalChild(word);
00134 } else {
00135 currNode = currNode->GetOrCreateChild(word);
00136 }
00137
00138 UTIL_THROW_IF2(currNode == NULL, "Node not found at position " << pos);
00139 }
00140
00141 return *currNode;
00142 }
00143
00144 void RuleTrie::SortAndPrune(std::size_t tableLimit)
00145 {
00146 if (tableLimit) {
00147 m_root.Sort(tableLimit);
00148 }
00149 }
00150
00151 }
00152 }
00153 }