From f55d4a88dbd90d9f58eba9fe293ad615c066fb15 Mon Sep 17 00:00:00 2001
From: Kashif Rasul <kashif.rasul@zalando.de>
Date: Mon, 15 May 2017 11:07:42 +0200
Subject: [PATCH] added initial support to compile on OSX

---
 .gitignore | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 README.md  | 11 ++++++++---
 autogen.sh |  4 +++-
 3 files changed, 60 insertions(+), 4 deletions(-)
 create mode 100644 .gitignore
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..fa75ffb
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,49 @@
+Makefile
+Makefile.in
+/ar-lib
+/mdate-sh
+/py-compile
+/test-driver
+/ylwrap
+
+/autom4te.cache
+/autoscan.log
+/autoscan-*.log
+/aclocal.m4
+/compile
+/config.guess
+/config.h.in
+/config.sub
+/configure
+/configure.scan
+/depcomp
+/install-sh
+/missing
+/stamp-h1
+/libtool
+/config.h
+/config.status
+/autogen.sh
+/ltmain.sh
+
+*.o
+*.lo
+*.a
+*.la
+
+.libs
+.deps
+
+*.m4
+*.log
+
+compile_charsmap
+
+spm_decode
+spm_encode
+spm_export_vocab
+spm_train
+spm_normalize
+
+*.pb.cc
+*.pb.h
diff --git a/README.md b/README.md
index 3958e57..2eee641 100644
--- a/README.md
+++ b/README.md
@@ -80,6 +80,11 @@ On Ubuntu, autotools and protobuf library can be install with apt-get:
 ```
 (If `libprotobuf9v5` is not found, try `libprotobuf-c++` instead.)
 
+On OSX, you can use brew:
+```
+% brew install protobuf
+```
+
 ## Build and Install SentencePiece
 ```
 % cd /path/to/sentencepiece
@@ -131,7 +136,7 @@ Use `--extra_options` flag to decode the text in reverse order.
 ## End-to-End Example
 ```
 % spm_train --input=data/botchan.txt --model_prefix=m --vocab_size=1000
-unigram_model_trainer.cc(494) LOG(INFO) Starts training with : 
+unigram_model_trainer.cc(494) LOG(INFO) Starts training with :
 input: "../data/botchan.txt"
 ... <snip>
 unigram_model_trainer.cc(529) LOG(INFO) EM sub_iter=1 size=1100 obj=10.4973 num_tokens=37630 num_tokens/piece=34.2091
@@ -167,7 +172,7 @@ You can find that the original input sentence is restored from the vocabulary id
     *   **neologd**: [MeCab with neologd](https://github.com/neologd/mecab-ipadic-neologd) for Japanese.
     *   **(Moses/KyTea)+SentencePiece**: Apply SentencePiece (Unigram) to pre-tokenized sentences. We have several variants with different tokenizers., e.g., **(Moses/MeCab)+SentencePiece**, **(MeCab/Moses)+SentencePiece**.
     *   *char**: Segments sentence by characters.
-    
+
 *   Data sets:
     *   [KFTT](http://www.phontron.com/kftt/index.html)
 
@@ -180,7 +185,7 @@ You can find that the original input sentence is restored from the vocabulary id
 *   Evaluation metrics:
     *   Case-sensitive BLEU on detokenized text with NIST scorer and KyTea segmenter. Used in-house rule-based detokenizer for Moses/KyTea/MeCab/neologd.
 
-    
+
 ### Results (BLEU scores)
 #### English to Japanese
 |Setting|vocab size|BLEU(dev)|BLEU(test)|src #tokens/sent.|trg #tokens/sent.|
diff --git a/autogen.sh b/autogen.sh
index d06792e..1e6d221 100755
--- a/autogen.sh
+++ b/autogen.sh
@@ -18,7 +18,9 @@ aclocal -I .
 echo "Running autoheader..."
 autoheader
 echo "Running libtoolize .."
-libtoolize
+case `uname` in Darwin*) glibtoolize ;;
+  *) libtoolize ;;
+esac
 echo "Running automake ..."
 automake --add-missing --copy
 echo "Running autoconf ..."