installing numpy headers for cython

Summary: Pull Request resolved: https://github.com/fairinternal/fairseq-py/pull/848

Differential Revision: D17060283

fbshipit-source-id: c7e61cae76a0566cc3e2ddc3ab4d48f8dec9d777
This commit is contained in:
Naman Goyal 2019-08-27 07:10:35 -07:00 committed by Facebook Github Bot
parent 3ab8e0fd73
commit 396ff7f59f
3 changed files with 57 additions and 12 deletions

View File

@ -15,8 +15,6 @@ import numpy as np
import sys
import types
from fairseq.data.data_utils_fast import batch_by_size_fast
def infer_language_pair(path):
"""Infer language pair from filename: <split>.<lang1>-<lang2>.(...).idx"""
@ -200,6 +198,12 @@ def batch_by_size(
required_batch_size_multiple (int, optional): require batch size to
be a multiple of N (default: 1).
"""
try:
from fairseq.data.data_utils_fast import batch_by_size_fast
except ImportError:
raise ImportError(
'Please build Cython components with: `pip install --editable .`'
)
max_tokens = max_tokens if max_tokens is not None else sys.maxsize
max_sentences = max_sentences if max_sentences is not None else sys.maxsize
bsz_mult = required_batch_size_multiple

View File

@ -6,11 +6,6 @@
import numpy as np
import torch
from fairseq.data.token_block_utils_fast import (
_get_slice_indices_fast,
_get_block_to_dataset_index_fast,
)
from fairseq.data import FairseqDataset, plasma_utils
@ -47,6 +42,16 @@ class TokenBlockDataset(FairseqDataset):
include_targets=False,
document_sep_len=1,
):
try:
from fairseq.data.token_block_utils_fast import (
_get_slice_indices_fast,
_get_block_to_dataset_index_fast,
)
except ImportError:
raise ImportError(
'Please build Cython components with: `pip install --editable .`'
)
super().__init__()
self.dataset = dataset
self.pad = pad

View File

@ -15,9 +15,12 @@ with open('README.md') as f:
readme = f.read()
if sys.platform == 'darwin':
extra_compile_args = ['-stdlib=libc++']
extra_compile_args = ['-stdlib=libc++', '-O3']
extra_link_args = ['-stdlib=libc++']
else:
extra_compile_args = ['-std=c++11']
extra_compile_args = ['-std=c++11', '-O3']
extra_link_args = ['-std=c++11']
bleu = Extension(
'fairseq.libbleu',
sources=[
@ -27,8 +30,39 @@ bleu = Extension(
extra_compile_args=extra_compile_args,
)
token_block_utils = [Extension("fairseq.data.token_block_utils_fast", ["fairseq/data/token_block_utils_fast.pyx"])]
data_utils_fast = [Extension("fairseq.data.data_utils_fast", ["fairseq/data/data_utils_fast.pyx"], language="c++")]
def get_cython_modules():
token_block_utils = Extension(
"fairseq.data.token_block_utils_fast",
["fairseq/data/token_block_utils_fast.pyx"],
extra_compile_args=extra_compile_args,
extra_link_args=extra_link_args,
)
data_utils_fast = Extension(
"fairseq.data.data_utils_fast",
["fairseq/data/data_utils_fast.pyx"],
language="c++",
extra_compile_args=extra_compile_args,
extra_link_args=extra_link_args,
)
return [token_block_utils, data_utils_fast]
def my_build_ext(pars):
"""
Delay loading of numpy headers.
More details: https://stackoverflow.com/questions/54117786/add-numpy-get-include-argument-to-setuptools-without-preinstalled-numpy
"""
from setuptools.command.build_ext import build_ext as _build_ext
class build_ext(_build_ext):
def finalize_options(self):
_build_ext.finalize_options(self)
__builtins__.__NUMPY_SETUP__ = False
import numpy
self.include_dirs.append(numpy.get_include())
return build_ext(pars)
setup(
name='fairseq',
@ -45,6 +79,7 @@ setup(
long_description=readme,
long_description_content_type='text/markdown',
setup_requires=[
'numpy',
'cython',
'setuptools>=18.0',
],
@ -58,7 +93,7 @@ setup(
'tqdm',
],
packages=find_packages(exclude=['scripts', 'tests']),
ext_modules=token_block_utils + data_utils_fast + [bleu],
ext_modules=get_cython_modules() + [bleu],
test_suite='tests',
entry_points={
'console_scripts': [
@ -71,5 +106,6 @@ setup(
'fairseq-validate = fairseq_cli.validate:cli_main',
],
},
cmdclass={'build_ext': my_build_ext},
zip_safe=False,
)