00001 #include "lm/partial.hh"
00002
00003 #include "lm/left.hh"
00004 #include "lm/model.hh"
00005 #include "util/tokenize_piece.hh"
00006
00007 #define BOOST_TEST_MODULE PartialTest
00008 #include <boost/test/unit_test.hpp>
00009 #include <boost/test/floating_point_comparison.hpp>
00010
00011 namespace lm {
00012 namespace ngram {
00013 namespace {
00014
00015 const char *TestLocation() {
00016 if (boost::unit_test::framework::master_test_suite().argc < 2) {
00017 return "test.arpa";
00018 }
00019 return boost::unit_test::framework::master_test_suite().argv[1];
00020 }
00021
00022 Config SilentConfig() {
00023 Config config;
00024 config.arpa_complain = Config::NONE;
00025 config.messages = NULL;
00026 return config;
00027 }
00028
00029 struct ModelFixture {
00030 ModelFixture() : m(TestLocation(), SilentConfig()) {}
00031
00032 RestProbingModel m;
00033 };
00034
00035 BOOST_FIXTURE_TEST_SUITE(suite, ModelFixture)
00036
00037 BOOST_AUTO_TEST_CASE(SimpleBefore) {
00038 Left left;
00039 left.full = false;
00040 left.length = 0;
00041 Right right;
00042 right.length = 0;
00043
00044 Right reveal;
00045 reveal.length = 1;
00046 WordIndex period = m.GetVocabulary().Index(".");
00047 reveal.words[0] = period;
00048 reveal.backoff[0] = -0.845098;
00049
00050 BOOST_CHECK_CLOSE(0.0, RevealBefore(m, reveal, 0, false, left, right), 0.001);
00051 BOOST_CHECK_EQUAL(0, left.length);
00052 BOOST_CHECK(!left.full);
00053 BOOST_CHECK_EQUAL(1, right.length);
00054 BOOST_CHECK_EQUAL(period, right.words[0]);
00055 BOOST_CHECK_CLOSE(-0.845098, right.backoff[0], 0.001);
00056
00057 WordIndex more = m.GetVocabulary().Index("more");
00058 reveal.words[1] = more;
00059 reveal.backoff[1] = -0.4771212;
00060 reveal.length = 2;
00061 BOOST_CHECK_CLOSE(0.0, RevealBefore(m, reveal, 1, false, left, right), 0.001);
00062 BOOST_CHECK_EQUAL(0, left.length);
00063 BOOST_CHECK(!left.full);
00064 BOOST_CHECK_EQUAL(2, right.length);
00065 BOOST_CHECK_EQUAL(period, right.words[0]);
00066 BOOST_CHECK_EQUAL(more, right.words[1]);
00067 BOOST_CHECK_CLOSE(-0.845098, right.backoff[0], 0.001);
00068 BOOST_CHECK_CLOSE(-0.4771212, right.backoff[1], 0.001);
00069 }
00070
00071 BOOST_AUTO_TEST_CASE(AlsoWouldConsider) {
00072 WordIndex would = m.GetVocabulary().Index("would");
00073 WordIndex consider = m.GetVocabulary().Index("consider");
00074
00075 ChartState current;
00076 current.left.length = 1;
00077 current.left.pointers[0] = would;
00078 current.left.full = false;
00079 current.right.length = 1;
00080 current.right.words[0] = would;
00081 current.right.backoff[0] = -0.30103;
00082
00083 Left after;
00084 after.full = false;
00085 after.length = 1;
00086 after.pointers[0] = consider;
00087
00088
00089 BOOST_CHECK_CLOSE(-1.687872 - -0.2922095 - 0.30103, RevealAfter(m, current.left, current.right, after, 0), 0.001);
00090
00091 BOOST_CHECK_EQUAL(2, current.left.length);
00092 BOOST_CHECK_EQUAL(would, current.left.pointers[0]);
00093 BOOST_CHECK_EQUAL(false, current.left.full);
00094
00095 WordIndex also = m.GetVocabulary().Index("also");
00096 Right before;
00097 before.length = 1;
00098 before.words[0] = also;
00099 before.backoff[0] = -0.30103;
00100
00101
00102 BOOST_CHECK_CLOSE(-2 + 0.2922095 -3 + 1.988902, RevealBefore(m, before, 0, false, current.left, current.right), 0.001);
00103 BOOST_CHECK_EQUAL(0, current.left.length);
00104 BOOST_CHECK(current.left.full);
00105 BOOST_CHECK_EQUAL(2, current.right.length);
00106 BOOST_CHECK_EQUAL(would, current.right.words[0]);
00107 BOOST_CHECK_EQUAL(also, current.right.words[1]);
00108 }
00109
00110 BOOST_AUTO_TEST_CASE(EndSentence) {
00111 WordIndex loin = m.GetVocabulary().Index("loin");
00112 WordIndex period = m.GetVocabulary().Index(".");
00113 WordIndex eos = m.GetVocabulary().EndSentence();
00114
00115 ChartState between;
00116 between.left.length = 1;
00117 between.left.pointers[0] = eos;
00118 between.left.full = true;
00119 between.right.length = 0;
00120
00121 Right before;
00122 before.words[0] = period;
00123 before.words[1] = loin;
00124 before.backoff[0] = -0.845098;
00125 before.backoff[1] = 0.0;
00126
00127 before.length = 1;
00128 BOOST_CHECK_CLOSE(-0.0410707, RevealBefore(m, before, 0, true, between.left, between.right), 0.001);
00129 BOOST_CHECK_EQUAL(0, between.left.length);
00130 }
00131
00132 float ScoreFragment(const RestProbingModel &model, unsigned int *begin, unsigned int *end, ChartState &out) {
00133 RuleScore<RestProbingModel> scorer(model, out);
00134 for (unsigned int *i = begin; i < end; ++i) {
00135 scorer.Terminal(*i);
00136 }
00137 return scorer.Finish();
00138 }
00139
00140 void CheckAdjustment(const RestProbingModel &model, float expect, const Right &before_in, bool before_full, ChartState between, const Left &after_in) {
00141 Right before(before_in);
00142 Left after(after_in);
00143 after.full = false;
00144 float got = 0.0;
00145 for (unsigned int i = 1; i < 5; ++i) {
00146 if (before_in.length >= i) {
00147 before.length = i;
00148 got += RevealBefore(model, before, i - 1, false, between.left, between.right);
00149 }
00150 if (after_in.length >= i) {
00151 after.length = i;
00152 got += RevealAfter(model, between.left, between.right, after, i - 1);
00153 }
00154 }
00155 if (after_in.full) {
00156 after.full = true;
00157 got += RevealAfter(model, between.left, between.right, after, after.length);
00158 }
00159 if (before_full) {
00160 got += RevealBefore(model, before, before.length, true, between.left, between.right);
00161 }
00162
00163 BOOST_CHECK(fabs(expect - got) < 0.001);
00164 }
00165
00166 void FullDivide(const RestProbingModel &model, StringPiece str) {
00167 std::vector<WordIndex> indices;
00168 for (util::TokenIter<util::SingleCharacter, true> i(str, ' '); i; ++i) {
00169 indices.push_back(model.GetVocabulary().Index(*i));
00170 }
00171 ChartState full_state;
00172 float full = ScoreFragment(model, &indices.front(), &indices.back() + 1, full_state);
00173
00174 ChartState before_state;
00175 before_state.left.full = false;
00176 RuleScore<RestProbingModel> before_scorer(model, before_state);
00177 float before_score = 0.0;
00178 for (unsigned int before = 0; before < indices.size(); ++before) {
00179 for (unsigned int after = before; after <= indices.size(); ++after) {
00180 ChartState after_state, between_state;
00181 float after_score = ScoreFragment(model, &indices.front() + after, &indices.front() + indices.size(), after_state);
00182 float between_score = ScoreFragment(model, &indices.front() + before, &indices.front() + after, between_state);
00183 CheckAdjustment(model, full - before_score - after_score - between_score, before_state.right, before_state.left.full, between_state, after_state.left);
00184 }
00185 before_scorer.Terminal(indices[before]);
00186 before_score = before_scorer.Finish();
00187 }
00188 }
00189
00190 BOOST_AUTO_TEST_CASE(Strings) {
00191 FullDivide(m, "also would consider");
00192 FullDivide(m, "looking on a little more loin . </s>");
00193 FullDivide(m, "in biarritz watching considering looking . on a little more loin also would consider higher to look good unknown the screening foo bar , unknown however unknown </s>");
00194 }
00195
00196 BOOST_AUTO_TEST_SUITE_END()
00197 }
00198 }
00199 }