00001 #include "BleuScorer.h"
00002
00003 #define BOOST_TEST_MODULE MertBleuScorer
00004 #include <boost/test/unit_test.hpp>
00005
00006 #include <cmath>
00007 #include "Ngram.h"
00008 #include "Vocabulary.h"
00009 #include "Util.h"
00010
00011 using namespace MosesTuning;
00012
00013 namespace
00014 {
00015
00016 NgramCounts* g_counts = NULL;
00017
00018 NgramCounts* GetNgramCounts()
00019 {
00020 assert(g_counts);
00021 return g_counts;
00022 }
00023
00024 void SetNgramCounts(NgramCounts* counts)
00025 {
00026 g_counts = counts;
00027 }
00028
00029 struct Unigram {
00030 Unigram(const std::string& a) {
00031 instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(a));
00032 }
00033 NgramCounts::Key instance;
00034 };
00035
00036 struct Bigram {
00037 Bigram(const std::string& a, const std::string& b) {
00038 instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(a));
00039 instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(b));
00040 }
00041 NgramCounts::Key instance;
00042 };
00043
00044 struct Trigram {
00045 Trigram(const std::string& a, const std::string& b, const std::string& c) {
00046 instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(a));
00047 instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(b));
00048 instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(c));
00049 }
00050 NgramCounts::Key instance;
00051 };
00052
00053 struct Fourgram {
00054 Fourgram(const std::string& a, const std::string& b,
00055 const std::string& c, const std::string& d) {
00056 instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(a));
00057 instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(b));
00058 instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(c));
00059 instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(d));
00060 }
00061 NgramCounts::Key instance;
00062 };
00063
00064 bool CheckUnigram(const std::string& str)
00065 {
00066 Unigram unigram(str);
00067 NgramCounts::Value v;
00068 return GetNgramCounts()->Lookup(unigram.instance, &v);
00069 }
00070
00071 bool CheckBigram(const std::string& a, const std::string& b)
00072 {
00073 Bigram bigram(a, b);
00074 NgramCounts::Value v;
00075 return GetNgramCounts()->Lookup(bigram.instance, &v);
00076 }
00077
00078 bool CheckTrigram(const std::string& a, const std::string& b,
00079 const std::string& c)
00080 {
00081 Trigram trigram(a, b, c);
00082 NgramCounts::Value v;
00083 return GetNgramCounts()->Lookup(trigram.instance, &v);
00084 }
00085
00086 bool CheckFourgram(const std::string& a, const std::string& b,
00087 const std::string& c, const std::string& d)
00088 {
00089 Fourgram fourgram(a, b, c, d);
00090 NgramCounts::Value v;
00091 return GetNgramCounts()->Lookup(fourgram.instance, &v);
00092 }
00093
00094 void SetUpReferences(BleuScorer& scorer)
00095 {
00096
00097
00098 {
00099 std::stringstream ref1;
00100 ref1 << "israeli officials are responsible for airport security" << std::endl;
00101 BOOST_CHECK(scorer.OpenReferenceStream(&ref1, 0));
00102 }
00103
00104 {
00105 std::stringstream ref2;
00106 ref2 << "israel is in charge of the security at this airport" << std::endl;
00107 BOOST_CHECK(scorer.OpenReferenceStream(&ref2, 1));
00108 }
00109
00110 {
00111 std::stringstream ref3;
00112 ref3 << "the security work for this airport is the responsibility of the israel government"
00113 << std::endl;
00114 BOOST_CHECK(scorer.OpenReferenceStream(&ref3, 2));
00115 }
00116
00117 {
00118 std::stringstream ref4;
00119 ref4 << "israli side was in charge of the security of this airport" << std::endl;
00120 BOOST_CHECK(scorer.OpenReferenceStream(&ref4, 3));
00121 }
00122 }
00123
00124 }
00125
00126 BOOST_AUTO_TEST_CASE(bleu_reference_type)
00127 {
00128 BleuScorer scorer;
00129
00130 BOOST_CHECK_EQUAL(scorer.GetReferenceLengthType(), BleuScorer::CLOSEST);
00131
00132 scorer.SetReferenceLengthType(BleuScorer::AVERAGE);
00133 BOOST_CHECK_EQUAL(scorer.GetReferenceLengthType(), BleuScorer::AVERAGE);
00134
00135 scorer.SetReferenceLengthType(BleuScorer::SHORTEST);
00136 BOOST_CHECK_EQUAL(scorer.GetReferenceLengthType(), BleuScorer::SHORTEST);
00137 }
00138
00139 BOOST_AUTO_TEST_CASE(bleu_reference_type_with_config)
00140 {
00141 {
00142 BleuScorer scorer("reflen:average");
00143 BOOST_CHECK_EQUAL(scorer.GetReferenceLengthType(), BleuScorer::AVERAGE);
00144 }
00145
00146 {
00147 BleuScorer scorer("reflen:shortest");
00148 BOOST_CHECK_EQUAL(scorer.GetReferenceLengthType(), BleuScorer::SHORTEST);
00149 }
00150 }
00151
00152 BOOST_AUTO_TEST_CASE(bleu_count_ngrams)
00153 {
00154 BleuScorer scorer;
00155
00156 std::string line = "I saw a girl with a telescope .";
00157
00158
00159
00160
00161
00162
00163
00164
00165
00166 NgramCounts counts;
00167 BOOST_REQUIRE(scorer.CountNgrams(line, counts, kBleuNgramOrder) == 8);
00168 BOOST_CHECK_EQUAL((std::size_t)25, counts.size());
00169
00170 mert::Vocabulary* vocab = scorer.GetVocab();
00171 BOOST_CHECK_EQUAL((std::size_t)7, vocab->size());
00172
00173 std::vector<std::string> res;
00174 Tokenize(line.c_str(), ' ', &res);
00175 std::vector<int> ids(res.size());
00176 for (std::size_t i = 0; i < res.size(); ++i) {
00177 BOOST_CHECK(vocab->Lookup(res[i], &ids[i]));
00178 }
00179
00180 SetNgramCounts(&counts);
00181
00182
00183 for (std::size_t i = 0; i < res.size(); ++i) {
00184 BOOST_CHECK(CheckUnigram(res[i]));
00185 }
00186
00187
00188 BOOST_CHECK(CheckBigram("I", "saw"));
00189 BOOST_CHECK(CheckBigram("saw", "a"));
00190 BOOST_CHECK(CheckBigram("a", "girl"));
00191 BOOST_CHECK(CheckBigram("girl", "with"));
00192 BOOST_CHECK(CheckBigram("with", "a"));
00193 BOOST_CHECK(CheckBigram("a", "telescope"));
00194 BOOST_CHECK(CheckBigram("telescope", "."));
00195
00196
00197 BOOST_CHECK(CheckTrigram("I", "saw", "a"));
00198 BOOST_CHECK(CheckTrigram("saw", "a", "girl"));
00199 BOOST_CHECK(CheckTrigram("a", "girl", "with"));
00200 BOOST_CHECK(CheckTrigram("girl", "with", "a"));
00201 BOOST_CHECK(CheckTrigram("with", "a", "telescope"));
00202 BOOST_CHECK(CheckTrigram("a", "telescope", "."));
00203
00204
00205 BOOST_CHECK(CheckFourgram("I", "saw", "a", "girl"));
00206 BOOST_CHECK(CheckFourgram("saw", "a", "girl", "with"));
00207 BOOST_CHECK(CheckFourgram("a", "girl", "with", "a"));
00208 BOOST_CHECK(CheckFourgram("girl", "with", "a", "telescope"));
00209 BOOST_CHECK(CheckFourgram("with", "a", "telescope", "."));
00210 }
00211
00212 BOOST_AUTO_TEST_CASE(bleu_clipped_counts)
00213 {
00214 BleuScorer scorer;
00215 SetUpReferences(scorer);
00216 std::string line("israeli officials responsibility of airport safety");
00217 ScoreStats entry;
00218 scorer.prepareStats(0, line, entry);
00219
00220 BOOST_CHECK_EQUAL(entry.size(), (std::size_t)(2 * kBleuNgramOrder + 1));
00221
00222
00223 BOOST_CHECK_EQUAL(entry.get(0), 5);
00224 BOOST_CHECK_EQUAL(entry.get(2), 2);
00225 BOOST_CHECK_EQUAL(entry.get(4), 0);
00226 BOOST_CHECK_EQUAL(entry.get(6), 0);
00227
00228
00229 BOOST_CHECK_EQUAL(entry.get(1), 6);
00230 BOOST_CHECK_EQUAL(entry.get(3), 5);
00231 BOOST_CHECK_EQUAL(entry.get(5), 4);
00232 BOOST_CHECK_EQUAL(entry.get(7), 3);
00233 }
00234
00235 BOOST_AUTO_TEST_CASE(calculate_actual_score)
00236 {
00237 BOOST_REQUIRE(4 == kBleuNgramOrder);
00238 std::vector<ScoreStatsType> stats(2 * kBleuNgramOrder + 1);
00239 BleuScorer scorer;
00240
00241
00242 stats[0] = 6;
00243 stats[1] = 6;
00244
00245
00246 stats[2] = 4;
00247 stats[3] = 5;
00248
00249
00250 stats[4] = 2;
00251 stats[5] = 4;
00252
00253
00254 stats[6] = 1;
00255 stats[7] = 3;
00256
00257
00258 stats[8] = 7;
00259
00260 BOOST_CHECK_CLOSE(0.5115f, scorer.calculateScore(stats), 0.01);
00261 }
00262
00263 BOOST_AUTO_TEST_CASE(sentence_level_bleu)
00264 {
00265 BOOST_REQUIRE(4 == kBleuNgramOrder);
00266 std::vector<float> stats(2 * kBleuNgramOrder + 1);
00267
00268
00269 stats[0] = 6.0;
00270 stats[1] = 6.0;
00271
00272
00273 stats[2] = 4.0;
00274 stats[3] = 5.0;
00275
00276
00277 stats[4] = 2.0;
00278 stats[5] = 4.0;
00279
00280
00281 stats[6] = 1.0;
00282 stats[7] = 3.0;
00283
00284
00285 stats[8] = 7.0;
00286
00287 BOOST_CHECK_CLOSE(0.5985f, smoothedSentenceBleu(stats), 0.01);
00288 BOOST_CHECK_CLOSE(0.5624f, smoothedSentenceBleu(stats, 0.5), 0.01 );
00289 BOOST_CHECK_CLOSE(0.5067f, smoothedSentenceBleu(stats, 1.0, true), 0.01);
00290 }