00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #include "SentenceAlignmentWithSyntax.h"
00021
00022 #include <map>
00023 #include <set>
00024 #include <string>
00025
00026 #include "tables-core.h"
00027 #include "XmlException.h"
00028 #include "XmlTree.h"
00029 #include "util/tokenize.hh"
00030
00031 using namespace std;
00032
00033 namespace MosesTraining
00034 {
00035
00036 bool SentenceAlignmentWithSyntax::processTargetSentence(const char * targetString, int sentenceID, bool boundaryRules)
00037 {
00038 if (!m_targetSyntax) {
00039 return SentenceAlignment::processTargetSentence(targetString, sentenceID, boundaryRules);
00040 }
00041
00042 string targetStringCPP(targetString);
00043 try {
00044 ProcessAndStripXMLTags(targetStringCPP, targetTree,
00045 m_targetLabelCollection,
00046 m_targetTopLabelCollection,
00047 false);
00048 } catch (const XmlException & e) {
00049 std::cerr << "WARNING: failed to process target sentence at line "
00050 << sentenceID << ": " << e.getMsg() << std::endl;
00051 return false;
00052 }
00053 target = util::tokenize(targetStringCPP);
00054 return true;
00055 }
00056
00057 bool SentenceAlignmentWithSyntax::processSourceSentence(const char * sourceString, int sentenceID, bool boundaryRules)
00058 {
00059 if (!m_sourceSyntax) {
00060 return SentenceAlignment::processSourceSentence(sourceString, sentenceID, boundaryRules);
00061 }
00062
00063 string sourceStringCPP(sourceString);
00064 try {
00065 ProcessAndStripXMLTags(sourceStringCPP, sourceTree,
00066 m_sourceLabelCollection ,
00067 m_sourceTopLabelCollection,
00068 false);
00069 } catch (const XmlException & e) {
00070 std::cerr << "WARNING: failed to process source sentence at line "
00071 << sentenceID << ": " << e.getMsg() << std::endl;
00072 return false;
00073 }
00074 source = util::tokenize(sourceStringCPP);
00075 return true;
00076 }
00077
00078 }