00001 #include "HyperPathLoader.h"
00002
00003 #include "TreeFragmentTokenizer.h"
00004
00005 namespace Moses
00006 {
00007 namespace Syntax
00008 {
00009 namespace F2S
00010 {
00011
00012 void HyperPathLoader::Load(const StringPiece &s, HyperPath &path)
00013 {
00014 path.nodeSeqs.clear();
00015
00016 m_tokenSeq.clear();
00017 for (TreeFragmentTokenizer p(s); p != TreeFragmentTokenizer(); ++p) {
00018 m_tokenSeq.push_back(*p);
00019 }
00020
00021 int height = DetermineHeight();
00022
00023 path.nodeSeqs.resize(height+1);
00024
00025 GenerateNodeTupleSeq(height);
00026
00027 for (int depth = 0; depth <= height; ++depth) {
00028 int prevParent = -1;
00029
00030
00031 for (std::vector<NodeTuple>::const_iterator p = m_nodeTupleSeq.begin();
00032 p != m_nodeTupleSeq.end(); ++p) {
00033 const NodeTuple &tuple = *p;
00034 if (tuple.depth != depth) {
00035 continue;
00036 }
00037 if (prevParent != -1 && tuple.parent != prevParent) {
00038 path.nodeSeqs[depth].push_back(HyperPath::kComma);
00039 }
00040 path.nodeSeqs[depth].push_back(tuple.symbol);
00041 prevParent = tuple.parent;
00042 }
00043 }
00044 }
00045
00046 int HyperPathLoader::DetermineHeight() const
00047 {
00048 int height = 0;
00049 int maxHeight = 0;
00050 std::size_t numTokens = m_tokenSeq.size();
00051 for (std::size_t i = 0; i < numTokens; ++i) {
00052 if (m_tokenSeq[i].type == TreeFragmentToken_LSB) {
00053 assert(i+2 < numTokens);
00054
00055
00056 if (m_tokenSeq[i+2].type != TreeFragmentToken_RSB) {
00057 maxHeight = std::max(++height, maxHeight);
00058 } else {
00059 i += 2;
00060 }
00061 } else if (m_tokenSeq[i].type == TreeFragmentToken_RSB) {
00062 --height;
00063 }
00064 }
00065 return maxHeight;
00066 }
00067
00068 void HyperPathLoader::GenerateNodeTupleSeq(int height)
00069 {
00070 m_nodeTupleSeq.clear();
00071
00072
00073 assert(m_parentStack.empty());
00074 m_parentStack.push(-1);
00075
00076
00077
00078 NodeTuple tuple;
00079 tuple.index = -1;
00080 tuple.parent = -1;
00081 tuple.depth = -1;
00082 tuple.symbol = HyperPath::kEpsilon;
00083
00084
00085 std::size_t numTokens = m_tokenSeq.size();
00086 for (std::size_t i = 0; i < numTokens; ++i) {
00087 if (m_tokenSeq[i].type == TreeFragmentToken_LSB) {
00088 assert(i+2 < numTokens);
00089
00090
00091 if (m_tokenSeq[i+2].type != TreeFragmentToken_RSB) {
00092 ++tuple.index;
00093 tuple.parent = m_parentStack.top();
00094 m_parentStack.push(tuple.index);
00095 ++tuple.depth;
00096 tuple.symbol = AddNonTerminalFactor(m_tokenSeq[++i].value)->GetId();
00097 m_nodeTupleSeq.push_back(tuple);
00098 } else {
00099 ++tuple.index;
00100 tuple.parent = m_parentStack.top();
00101 ++tuple.depth;
00102 tuple.symbol = AddNonTerminalFactor(m_tokenSeq[++i].value)->GetId();
00103 m_nodeTupleSeq.push_back(tuple);
00104
00105 if (tuple.depth < height) {
00106 int origDepth = tuple.depth;
00107 m_parentStack.push(tuple.index);
00108 for (int depth = origDepth+1; depth <= height; ++depth) {
00109 ++tuple.index;
00110 tuple.parent = m_parentStack.top();
00111 m_parentStack.push(tuple.index);
00112 tuple.depth = depth;
00113 tuple.symbol = HyperPath::kEpsilon;
00114 m_nodeTupleSeq.push_back(tuple);
00115 }
00116 for (int depth = origDepth; depth <= height; ++depth) {
00117 m_parentStack.pop();
00118 }
00119 tuple.depth = origDepth;
00120 }
00121 --tuple.depth;
00122
00123 ++i;
00124 }
00125 } else if (m_tokenSeq[i].type == TreeFragmentToken_WORD) {
00126
00127
00128
00129 ++tuple.index;
00130 tuple.parent = m_parentStack.top();
00131 ++tuple.depth;
00132 tuple.symbol = AddTerminalFactor(m_tokenSeq[i].value)->GetId();
00133 m_nodeTupleSeq.push_back(tuple);
00134
00135 if (m_tokenSeq[i+1].type == TreeFragmentToken_RSB &&
00136 tuple.depth < height) {
00137 int origDepth = tuple.depth;
00138 m_parentStack.push(tuple.index);
00139 for (int depth = origDepth+1; depth <= height; ++depth) {
00140 ++tuple.index;
00141 tuple.parent = m_parentStack.top();
00142 m_parentStack.push(tuple.index);
00143 tuple.depth = depth;
00144 tuple.symbol = HyperPath::kEpsilon;
00145 m_nodeTupleSeq.push_back(tuple);
00146 }
00147 for (int depth = origDepth; depth <= height; ++depth) {
00148 m_parentStack.pop();
00149 }
00150 tuple.depth = origDepth;
00151 }
00152 --tuple.depth;
00153 } else if (m_tokenSeq[i].type == TreeFragmentToken_RSB) {
00154 m_parentStack.pop();
00155 --tuple.depth;
00156 }
00157 }
00158
00159
00160 m_parentStack.pop();
00161 }
00162
00163 }
00164 }
00165 }