00001 #pragma once
00002
00003 #include <istream>
00004 #include <ostream>
00005 #include <string>
00006 #include <vector>
00007
00008 #include <boost/shared_ptr.hpp>
00009 #include <boost/unordered_map.hpp>
00010 #include <boost/unordered_set.hpp>
00011
00012 #include "syntax-common/numbered_set.h"
00013 #include "syntax-common/tree.h"
00014 #include "syntax-common/tree_fragment_tokenizer.h"
00015
00016 #include "Forest.h"
00017 #include "StringForest.h"
00018 #include "TsgFilter.h"
00019
00020 namespace MosesTraining
00021 {
00022 namespace Syntax
00023 {
00024 namespace FilterRuleTable
00025 {
00026
00027
00028
00029
00030 class ForestTsgFilter : public TsgFilter
00031 {
00032 public:
00033
00034 ForestTsgFilter(const std::vector<boost::shared_ptr<StringForest> > &);
00035
00036 private:
00037 struct IdForestValue {
00038 Vocabulary::IdType id;
00039 std::size_t start;
00040 std::size_t end;
00041 };
00042
00043 static const std::size_t kMatchLimit;
00044
00045
00046 typedef Forest<IdForestValue> IdForest;
00047
00048 typedef boost::unordered_map<std::size_t,
00049 std::vector<const IdForest::Vertex*> > InnerMap;
00050
00051 typedef std::vector<InnerMap> IdToSentenceMap;
00052
00053
00054 bool MatchFragment(const IdTree &, const std::vector<IdTree *> &);
00055
00056
00057 bool MatchFragment(const IdTree &, const IdForest::Vertex &);
00058
00059
00060
00061 boost::shared_ptr<IdForest> StringForestToIdForest(const StringForest &);
00062
00063 std::vector<boost::shared_ptr<IdForest> > m_sentences;
00064 IdToSentenceMap m_idToSentence;
00065 std::size_t m_matchCount;
00066 };
00067
00068 }
00069 }
00070 }