00001 #include "tree_fragment_tokenizer.h"
00002
00003 #define BOOST_TEST_MODULE TreeTest
00004 #include <boost/test/unit_test.hpp>
00005
00006 #include <boost/scoped_ptr.hpp>
00007
00008 namespace MosesTraining {
00009 namespace Syntax {
00010 namespace {
00011
00012 BOOST_AUTO_TEST_CASE(tokenize_empty) {
00013 const std::string fragment = "";
00014 std::vector<TreeFragmentToken> tokens;
00015 for (TreeFragmentTokenizer p(fragment); p != TreeFragmentTokenizer(); ++p) {
00016 tokens.push_back(*p);
00017 }
00018 BOOST_REQUIRE(tokens.empty());
00019 }
00020
00021 BOOST_AUTO_TEST_CASE(tokenize_space) {
00022 const std::string fragment = " [ weasel weasel ] [] ] wea[sel";
00023 std::vector<TreeFragmentToken> tokens;
00024 for (TreeFragmentTokenizer p(fragment); p != TreeFragmentTokenizer(); ++p) {
00025 tokens.push_back(*p);
00026 }
00027 BOOST_REQUIRE(tokens.size() == 10);
00028 BOOST_REQUIRE(tokens[0].type == TreeFragmentToken_LSB);
00029 BOOST_REQUIRE(tokens[0].value == "[");
00030 BOOST_REQUIRE(tokens[1].type == TreeFragmentToken_WORD);
00031 BOOST_REQUIRE(tokens[1].value == "weasel");
00032 BOOST_REQUIRE(tokens[2].type == TreeFragmentToken_WORD);
00033 BOOST_REQUIRE(tokens[2].value == "weasel");
00034 BOOST_REQUIRE(tokens[3].type == TreeFragmentToken_RSB);
00035 BOOST_REQUIRE(tokens[3].value == "]");
00036 BOOST_REQUIRE(tokens[4].type == TreeFragmentToken_LSB);
00037 BOOST_REQUIRE(tokens[4].value == "[");
00038 BOOST_REQUIRE(tokens[5].type == TreeFragmentToken_RSB);
00039 BOOST_REQUIRE(tokens[5].value == "]");
00040 BOOST_REQUIRE(tokens[6].type == TreeFragmentToken_RSB);
00041 BOOST_REQUIRE(tokens[6].value == "]");
00042 BOOST_REQUIRE(tokens[7].type == TreeFragmentToken_WORD);
00043 BOOST_REQUIRE(tokens[7].value == "wea");
00044 BOOST_REQUIRE(tokens[8].type == TreeFragmentToken_LSB);
00045 BOOST_REQUIRE(tokens[8].value == "[");
00046 BOOST_REQUIRE(tokens[9].type == TreeFragmentToken_WORD);
00047 BOOST_REQUIRE(tokens[9].value == "sel");
00048 }
00049
00050 BOOST_AUTO_TEST_CASE(tokenize_fragment) {
00051 const std::string fragment = "[S [NP [NN weasels]] [VP]]";
00052 std::vector<TreeFragmentToken> tokens;
00053 for (TreeFragmentTokenizer p(fragment); p != TreeFragmentTokenizer(); ++p) {
00054 tokens.push_back(*p);
00055 }
00056 BOOST_REQUIRE(tokens.size() == 13);
00057 BOOST_REQUIRE(tokens[0].type == TreeFragmentToken_LSB);
00058 BOOST_REQUIRE(tokens[1].type == TreeFragmentToken_WORD);
00059 BOOST_REQUIRE(tokens[2].type == TreeFragmentToken_LSB);
00060 BOOST_REQUIRE(tokens[3].type == TreeFragmentToken_WORD);
00061 BOOST_REQUIRE(tokens[4].type == TreeFragmentToken_LSB);
00062 BOOST_REQUIRE(tokens[5].type == TreeFragmentToken_WORD);
00063 BOOST_REQUIRE(tokens[6].type == TreeFragmentToken_WORD);
00064 BOOST_REQUIRE(tokens[7].type == TreeFragmentToken_RSB);
00065 BOOST_REQUIRE(tokens[8].type == TreeFragmentToken_RSB);
00066 BOOST_REQUIRE(tokens[9].type == TreeFragmentToken_LSB);
00067 BOOST_REQUIRE(tokens[10].type == TreeFragmentToken_WORD);
00068 BOOST_REQUIRE(tokens[11].type == TreeFragmentToken_RSB);
00069 BOOST_REQUIRE(tokens[12].type == TreeFragmentToken_RSB);
00070 }
00071
00072 }
00073 }
00074 }