00001 #include <iostream>
00002 #include <string>
00003
00004 #ifdef WITH_THREADS
00005 #include <boost/thread/thread.hpp>
00006 #endif
00007
00008 #include "moses/TranslationModel/CompactPT/LexicalReorderingTableCreator.h"
00009
00010 #include "util/file.hh"
00011
00012 using namespace Moses;
00013
00014 void printHelp(char **argv)
00015 {
00016 std::cerr << "Usage " << argv[0] << ":\n"
00017 " options: \n"
00018 "\t-in string -- input table file name\n"
00019 "\t-out string -- prefix of binary table file\n"
00020 "\t-T string -- path to temporary directory (uses /tmp by default)\n"
00021 #ifdef WITH_THREADS
00022 "\t-threads int|all -- number of threads used for conversion\n"
00023 #endif
00024 "\n advanced:\n"
00025 "\t-landmark int -- use landmark phrase every 2^n phrases\n"
00026 "\t-fingerprint int -- number of bits used for phrase fingerprints\n"
00027 "\t-join-scores -- single set of Huffman codes for score components\n"
00028 "\t-quantize int -- maximum number of scores per score component\n"
00029 "\n"
00030 " For more information see: http://www.statmt.org/moses/?n=Moses.AdvancedFeatures#ntoc6\n\n"
00031 " If you use this please cite:\n\n"
00032 " @article { junczys_pbml98_2012,\n"
00033 " author = { Marcin Junczys-Dowmunt },\n"
00034 " title = { Phrasal Rank-Encoding: Exploiting Phrase Redundancy and\n"
00035 " Translational Relations for Phrase Table Compression },\n"
00036 " journal = { The Prague Bulletin of Mathematical Linguistics },\n"
00037 " volume = { 98 },\n"
00038 " year = { 2012 },\n"
00039 " note = { Proceedings of the MT Marathon 2012, Edinburgh },\n"
00040 " }\n\n"
00041 " Acknowledgments: Part of this research was carried out at and funded by\n"
00042 " the World Intellectual Property Organization (WIPO) in Geneva.\n\n";
00043 }
00044
00045 int main(int argc, char** argv)
00046 {
00047
00048 std::string inFilePath;
00049 std::string outFilePath("out");
00050 std::string tempfilePath;
00051
00052 size_t orderBits = 10;
00053 size_t fingerPrintBits = 16;
00054 bool multipleScoreTrees = true;
00055 size_t quantize = 0;
00056
00057 size_t threads =
00058 #ifdef WITH_THREADS
00059 boost::thread::hardware_concurrency() ? boost::thread::hardware_concurrency() :
00060 #endif
00061 1;
00062
00063 if(1 >= argc) {
00064 printHelp(argv);
00065 return 1;
00066 }
00067 for(int i = 1; i < argc; ++i) {
00068 std::string arg(argv[i]);
00069 if("-in" == arg && i+1 < argc) {
00070 ++i;
00071 inFilePath = argv[i];
00072 } else if("-out" == arg && i+1 < argc) {
00073 ++i;
00074 outFilePath = argv[i];
00075 } else if("-T" == arg && i+1 < argc) {
00076 ++i;
00077 tempfilePath = argv[i];
00078 util::NormalizeTempPrefix(tempfilePath);
00079 } else if("-landmark" == arg && i+1 < argc) {
00080 ++i;
00081 orderBits = atoi(argv[i]);
00082 } else if("-fingerprint" == arg && i+1 < argc) {
00083 ++i;
00084 fingerPrintBits = atoi(argv[i]);
00085 } else if("-join-scores" == arg) {
00086 multipleScoreTrees = false;
00087 } else if("-quantize" == arg && i+1 < argc) {
00088 ++i;
00089 quantize = atoi(argv[i]);
00090 } else if("-threads" == arg && i+1 < argc) {
00091 #ifdef WITH_THREADS
00092 ++i;
00093 if(std::string(argv[i]) == "all") {
00094 threads = boost::thread::hardware_concurrency();
00095 if(!threads) {
00096 std::cerr << "Could not determine number of hardware threads, setting to 1" << std::endl;
00097 threads = 1;
00098 }
00099 } else
00100 threads = atoi(argv[i]);
00101 #else
00102 std::cerr << "Thread support not compiled in" << std::endl;
00103 exit(1);
00104 #endif
00105 } else {
00106
00107 printHelp(argv);
00108 return 1;
00109 }
00110 }
00111
00112 if(outFilePath.rfind(".minlexr") != outFilePath.size() - 8)
00113 outFilePath += ".minlexr";
00114
00115 LexicalReorderingTableCreator(
00116 inFilePath, outFilePath, tempfilePath,
00117 orderBits, fingerPrintBits,
00118 multipleScoreTrees, quantize
00119 #ifdef WITH_THREADS
00120 , threads
00121 #endif
00122 );
00123 }