00001 #include <iostream>
00002
00003 #include "util/tokenize_piece.hh"
00004
00005 #include "ForestRescore.h"
00006 #include "MiraFeatureVector.h"
00007
00008 #define BOOST_TEST_MODULE MertForestRescore
00009 #include <boost/test/unit_test.hpp>
00010
00011
00012
00013 using namespace std;
00014 using namespace MosesTuning;
00015
00016 BOOST_AUTO_TEST_CASE(viterbi_simple_lattice)
00017 {
00018 Vocab vocab;
00019 WordVec words;
00020 string wordStrings[] =
00021 {"<s>", "</s>", "a", "b", "c", "d", "e", "f", "g"};
00022 for (size_t i = 0; i < 9; ++i) {
00023 words.push_back(&(vocab.FindOrAdd((wordStrings[i]))));
00024 }
00025
00026 const string f1 = "foo";
00027 const string f2 = "bar";
00028 Graph graph(vocab);
00029 graph.SetCounts(5,5);
00030
00031 Edge* e0 = graph.NewEdge();
00032 e0->AddWord(words[0]);
00033 e0->AddFeature(f1, 2.0);
00034
00035 Vertex* v0 = graph.NewVertex();
00036 v0->AddEdge(e0);
00037
00038 Edge* e1 = graph.NewEdge();
00039 e1->AddWord(NULL);
00040 e1->AddChild(0);
00041 e1->AddWord(words[2]);
00042 e1->AddWord(words[3]);
00043 e1->AddFeature(f1, 1.0);
00044 e1->AddFeature(f2, 3.0);
00045
00046 Vertex* v1 = graph.NewVertex();
00047 v1->AddEdge(e1);
00048
00049 Edge* e2 = graph.NewEdge();
00050 e2->AddWord(NULL);
00051 e2->AddChild(1);
00052 e2->AddWord(words[4]);
00053 e2->AddWord(words[5]);
00054 e2->AddFeature(f2, 2.5);
00055
00056 Vertex* v2 = graph.NewVertex();
00057 v2->AddEdge(e2);
00058
00059 Edge* e3 = graph.NewEdge();
00060 e3->AddWord(NULL);
00061 e3->AddChild(2);
00062 e3->AddWord(words[6]);
00063 e3->AddWord(words[7]);
00064 e3->AddWord(words[8]);
00065 e3->AddFeature(f1, -1);
00066
00067 Vertex* v3 = graph.NewVertex();
00068 v3->AddEdge(e3);
00069
00070 Edge* e4 = graph.NewEdge();
00071 e4->AddWord(NULL);
00072 e4->AddChild(3);
00073 e4->AddWord(words[1]);
00074 e3->AddFeature(f2, 0.5);
00075
00076 Vertex* v4 = graph.NewVertex();
00077 v4->AddEdge(e4);
00078
00079 ReferenceSet references;
00080 references.AddLine(0, "a b c k e f o", vocab);
00081 HgHypothesis modelHypo;
00082 vector<FeatureStatsType> bg(kBleuNgramOrder*2+1);
00083 SparseVector weights;
00084 weights.set(f1,2);
00085 weights.set(f2,1);
00086 Viterbi(graph, weights, 0, references, 0, bg, &modelHypo);
00087 BOOST_CHECK_CLOSE(2.0,modelHypo.featureVector.get(f1), 0.0001);
00088 BOOST_CHECK_CLOSE(6.0,modelHypo.featureVector.get(f2), 0.0001);
00089
00090 BOOST_CHECK_EQUAL(words[0]->first, modelHypo.text[0]->first);
00091 BOOST_CHECK_EQUAL(words[2]->first, modelHypo.text[1]->first);
00092 BOOST_CHECK_EQUAL(words[3]->first, modelHypo.text[2]->first);
00093 BOOST_CHECK_EQUAL(words[4]->first, modelHypo.text[3]->first);
00094 BOOST_CHECK_EQUAL(words[5]->first, modelHypo.text[4]->first);
00095 BOOST_CHECK_EQUAL(words[6]->first, modelHypo.text[5]->first);
00096 BOOST_CHECK_EQUAL(words[7]->first, modelHypo.text[6]->first);
00097 BOOST_CHECK_EQUAL(words[8]->first, modelHypo.text[7]->first);
00098 BOOST_CHECK_EQUAL(words[1]->first, modelHypo.text[8]->first);
00099 }
00100
00101
00102
00103 BOOST_AUTO_TEST_CASE(viterbi_3branch_lattice)
00104 {
00105 Vocab vocab;
00106 WordVec words;
00107 string wordStrings[] =
00108 {"<s>", "</s>", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"};
00109 for (size_t i = 0; i < 13; ++i) {
00110 words.push_back(&(vocab.FindOrAdd((wordStrings[i]))));
00111 }
00112
00113 const string f1 = "foo";
00114 const string f2 = "bar";
00115 Graph graph(vocab);
00116 graph.SetCounts(5,8);
00117
00118 Edge* e0 = graph.NewEdge();
00119 e0->AddWord(words[0]);
00120
00121 Vertex* v0 = graph.NewVertex();
00122 v0->AddEdge(e0);
00123
00124 Edge* e1 = graph.NewEdge();
00125 e1->AddWord(NULL);
00126 e1->AddChild(0);
00127 e1->AddWord(words[2]);
00128 e1->AddWord(words[3]);
00129 e1->AddFeature(f1,1);
00130 e1->AddFeature(f2,1);
00131 Edge* e5 = graph.NewEdge();
00132 e5->AddWord(NULL);
00133 e5->AddChild(0);
00134 e5->AddWord(words[9]);
00135 e5->AddWord(words[10]);
00136 e5->AddFeature(f1,2);
00137 e5->AddFeature(f2,-2);
00138
00139 Vertex* v1 = graph.NewVertex();
00140 v1->AddEdge(e1);
00141 v1->AddEdge(e5);
00142 v1->SetSourceCovered(1);
00143
00144 Edge* e2 = graph.NewEdge();
00145 e2->AddWord(NULL);
00146 e2->AddChild(1);
00147 e2->AddWord(words[4]);
00148 e2->AddWord(words[5]);
00149 e2->AddFeature(f2,3);
00150
00151 Vertex* v2 = graph.NewVertex();
00152 v2->AddEdge(e2);
00153 v2->SetSourceCovered(3);
00154
00155 Edge* e3 = graph.NewEdge();
00156 e3->AddWord(NULL);
00157 e3->AddChild(2);
00158 e3->AddWord(words[6]);
00159 e3->AddWord(words[7]);
00160 e3->AddWord(words[8]);
00161 e3->AddFeature(f1,1);
00162 Edge* e6 = graph.NewEdge();
00163 e6->AddWord(NULL);
00164 e6->AddChild(2);
00165 e6->AddWord(words[9]);
00166 e6->AddWord(words[12]);
00167 e6->AddFeature(f2,1);
00168 Edge* e7 = graph.NewEdge();
00169 e7->AddWord(NULL);
00170 e7->AddChild(1);
00171 e7->AddWord(words[11]);
00172 e7->AddWord(words[12]);
00173 e7->AddFeature(f1,2);
00174 e7->AddFeature(f2,3);
00175
00176 Vertex* v3 = graph.NewVertex();
00177 v3->AddEdge(e3);
00178 v3->AddEdge(e6);
00179 v3->AddEdge(e7);
00180 v3->SetSourceCovered(5);
00181
00182 Edge* e4 = graph.NewEdge();
00183 e4->AddWord(NULL);
00184 e4->AddChild(3);
00185 e4->AddWord(words[1]);
00186
00187 Vertex* v4 = graph.NewVertex();
00188 v4->AddEdge(e4);
00189 v4->SetSourceCovered(6);
00190
00191
00192
00193
00194
00195
00196
00197
00198
00199
00200 ReferenceSet references;
00201 references.AddLine(0, "a b c d h k", vocab);
00202 HgHypothesis modelHypo;
00203 vector<FeatureStatsType> bg(kBleuNgramOrder*2+1, 0.1);
00204 SparseVector weights;
00205 weights.set(f1,2);
00206 weights.set(f2,1);
00207 Viterbi(graph, weights, 0, references, 0, bg, &modelHypo);
00208 BOOST_CHECK_CLOSE(3.0,modelHypo.featureVector.get(f1), 0.0001);
00209 BOOST_CHECK_CLOSE(4.0,modelHypo.featureVector.get(f2), 0.0001);
00210
00211 BOOST_CHECK_EQUAL(6, modelHypo.text.size());
00212
00213
00214 BOOST_CHECK_EQUAL(words[0]->first, modelHypo.text[0]->first);
00215 BOOST_CHECK_EQUAL(words[2]->first, modelHypo.text[1]->first);
00216 BOOST_CHECK_EQUAL(words[3]->first, modelHypo.text[2]->first);
00217 BOOST_CHECK_EQUAL(words[11]->first, modelHypo.text[3]->first);
00218 BOOST_CHECK_EQUAL(words[12]->first, modelHypo.text[4]->first);
00219 BOOST_CHECK_EQUAL(words[1]->first, modelHypo.text[5]->first);
00220
00221
00222 HgHypothesis hopeHypo;
00223 Viterbi(graph, weights, 1, references, 0, bg, &hopeHypo);
00224
00225 BOOST_CHECK_EQUAL(8, hopeHypo.text.size());
00226
00227 BOOST_CHECK_EQUAL(words[0]->first, hopeHypo.text[0]->first);
00228 BOOST_CHECK_EQUAL(words[2]->first, hopeHypo.text[1]->first);
00229 BOOST_CHECK_EQUAL(words[3]->first, hopeHypo.text[2]->first);
00230 BOOST_CHECK_EQUAL(words[4]->first, hopeHypo.text[3]->first);
00231 BOOST_CHECK_EQUAL(words[5]->first, hopeHypo.text[4]->first);
00232 BOOST_CHECK_EQUAL(words[9]->first, hopeHypo.text[5]->first);
00233 BOOST_CHECK_EQUAL(words[12]->first, hopeHypo.text[6]->first);
00234 BOOST_CHECK_EQUAL(words[1]->first, hopeHypo.text[7]->first);
00235
00236 BOOST_CHECK_EQUAL(kBleuNgramOrder*2+1, hopeHypo.bleuStats.size());
00237 BOOST_CHECK_EQUAL(6, hopeHypo.bleuStats[0]);
00238 BOOST_CHECK_EQUAL(6, hopeHypo.bleuStats[1]);
00239 BOOST_CHECK_EQUAL(5, hopeHypo.bleuStats[2]);
00240 BOOST_CHECK_EQUAL(5, hopeHypo.bleuStats[3]);
00241 BOOST_CHECK_EQUAL(4, hopeHypo.bleuStats[4]);
00242 BOOST_CHECK_EQUAL(4, hopeHypo.bleuStats[5]);
00243 BOOST_CHECK_EQUAL(3, hopeHypo.bleuStats[6]);
00244 BOOST_CHECK_EQUAL(3, hopeHypo.bleuStats[7]);
00245 BOOST_CHECK_EQUAL(6, hopeHypo.bleuStats[8]);
00246 }
00247