Moses: /disk4/html/www/moses/doxygen/mosesdecoder/phrase-extract/filter-rule-table/TsgFilter.h Source File

00001 #pragma once
00002 
00003 #include <istream>
00004 #include <ostream>
00005 #include <string>
00006 #include <vector>
00007 
00008 #include "syntax-common/numbered_set.h"
00009 #include "syntax-common/tree.h"
00010 #include "syntax-common/tree_fragment_tokenizer.h"
00011 
00012 namespace MosesTraining
00013 {
00014 namespace Syntax
00015 {
00016 namespace FilterRuleTable
00017 {
00018 
00019 // Base class for TreeTsgFilter and ForestTsgFilter, both of which filter rule
00020 // tables where the source-side is TSG.
00021 class TsgFilter
00022 {
00023 public:
00024   virtual ~TsgFilter() {}
00025 
00026   // Read a rule table from 'in' and filter it according to the test sentences.
00027   void Filter(std::istream &in, std::ostream &out);
00028 
00029 protected:
00030   // Maps symbols (terminals and non-terminals) from strings to integers.
00031   typedef NumberedSet<std::string, std::size_t> Vocabulary;
00032 
00033   // Represents a tree using integer vocabulary values.
00034   typedef Tree<Vocabulary::IdType> IdTree;
00035 
00036   // Build an IdTree (wrt m_testVocab) for the tree beginning at position i of
00037   // the token sequence or return 0 if any symbol in the fragment is not in
00038   // m_testVocab.  If successful then on return, i will be set to the position
00039   // immediately after the last token of the tree and leaves will contain the
00040   // pointers to the fragment's leaves.  If the build fails then i and leaves
00041   // are undefined.
00042   IdTree *BuildTree(const std::vector<TreeFragmentToken> &tokens, int &i,
00043                     std::vector<IdTree *> &leaves);
00044 
00045   // Try to match a fragment.  The implementation depends on whether the test
00046   // sentences are trees or forests.
00047   virtual bool MatchFragment(const IdTree &, const std::vector<IdTree *> &) = 0;
00048 
00049   // The symbol vocabulary of the test sentences.
00050   Vocabulary m_testVocab;
00051 };
00052 
00053 }  // namespace FilterRuleTable
00054 }  // namespace Syntax
00055 }  // namespace MosesTraining