00001 #pragma once 00002 00003 #include <string> 00004 #include <vector> 00005 00006 #include "syntax-common/tree_fragment_tokenizer.h" 00007 00008 #include "RuleSymbol.h" 00009 00010 namespace MosesTraining 00011 { 00012 namespace Syntax 00013 { 00014 namespace ScoreStsg 00015 { 00016 00017 // Stores one half of a STSG rule, as represented in the extract file. The 00018 // original string is stored as the member 'string', along with its token 00019 // sequence ('tokens') and frontier symbol sequence ('frontierSymbols'). Note 00020 // that 'tokens' and 'frontierSymbols' use StringPiece objects that depend on 00021 // the original string. Therefore changing the value of 'string' invalidates 00022 // both 'tokens' and 'frontierSymbols'. 00023 struct TokenizedRuleHalf { 00024 bool IsFullyLexical() const; 00025 bool IsString() const; 00026 bool IsTree() const; 00027 00028 // The rule half as it appears in the extract file, except with any trailing 00029 // or leading spaces removed (here a space is defined as a blank or a tab). 00030 std::string string; 00031 00032 // The token sequence for the string. 00033 std::vector<TreeFragmentToken> tokens; 00034 00035 // The frontier symbols of the rule half. For example: 00036 // 00037 // string: "[VP [VBN] [PP [IN] [NP [DT] [JJ positive] [NN light]]]]" 00038 // frontier: ("VBN",t), ("IN",t), ("DT",t), ("positive",f), ("light",f) 00039 // 00040 // string: "[X] [X] Sinne [X]" 00041 // frontier: ("X",t), ("X",t), ("Sinne",f), ("X",t) 00042 // 00043 std::vector<RuleSymbol> frontierSymbols; 00044 }; 00045 00046 } // namespace ScoreStsg 00047 } // namespace Syntax 00048 } // namespace MosesTraining