atlas: fix generic options

Build atlas with the generic options recommended by the upstream
documentation for distributions. The expression now takes the parameter
'threads' which configures the number of threads atlas will use. The
default is to build serial atlas ('threads = "0"'). The expression also
takes the parameter 'cacheEdge' which is the L2 cache per core, in
bytes. This reduces build time because the cache size doesn't need to be
detected. It also reduces impurity, since different build nodes on Hydra
may have different hardware. It is set to 256k by default, which is
recommended for distributions by the upstream documentation.
This commit is contained in:
Thomas Tuegel 2014-12-15 11:16:46 -06:00
parent 8a5d7e7944
commit 91657e30ca

View File

@ -1,5 +1,7 @@
{ stdenv, fetchurl, gfortran, tolerateCpuTimingInaccuracy ? true, shared ? false { stdenv, fetchurl, gfortran, tolerateCpuTimingInaccuracy ? true, shared ? false
, cpuConfig ? if stdenv.isi686 then "-b 32 -A 18 -V 1" else "-b 64 -A 31 -V 384" , cpuConfig ? if stdenv.isi686 then "-b 32 -A 12 -V 1" else "-b 64 -A 14 -V 384"
, cacheEdge ? "262144"
, threads ? "0"
}: }:
# Atlas detects the CPU and optimizes its build accordingly. This is great when # Atlas detects the CPU and optimizes its build accordingly. This is great when
@ -9,27 +11,40 @@
# cannot execute. # cannot execute.
# #
# To avoid these issues, the build is configured using the 'cpuConfig' # To avoid these issues, the build is configured using the 'cpuConfig'
# parameter as follows: # parameter. Upstream recommends these defaults for distributions:
# #
# | x86 CPU | x86_64 CPU | # | x86 CPU | x86_64 CPU |
# |---------------------------------------------+------------------------| # |---------------------------------------------+------------------------|
# | -b 32 | -b 64 | # | -b 32 | -b 64 |
# | -A 18 (Pentium II) | -A 31 (Athlon K7) | # | -A 12 (x86x87) | -A 14 (x86SSE2) |
# | -V 1 (No SIMD: Pentium II doesn't have SSE) | -V 384 (SSE1 and SSE2) | # | -V 1 (No SIMD) | -V 384 (SSE1 and SSE2) |
# #
# Users who want to compile a highly optimized version of ATLAS that's suitable # These defaults should give consistent performance across machines.
# for their local machine can override these settings accordingly. # Performance will be substantially lower than an optimized build, but a build
# optimized for one machine will give even worse performance on others. If you
# are a serious user of Atlas (e.g., you write code that uses it) you should
# compile an optimized version for each of your machines.
#
# The parameter 'cacheEdge' sets the L2 cache per core (in bytes). Setting this
# parameter reduces build time because some tests to detect the L2 cache size
# will not be run. It will also reduce impurity; different build nodes on Hydra
# may have different L2 cache sizes, but fixing the L2 cache size should
# account for that. This also makes the performance of binary substitutes more
# consistent.
# #
# The -V flags can change with each release as new instruction sets are added # The -V flags can change with each release as new instruction sets are added
# because upstream thinks it's a good idea to add entries at the start of an # because upstream thinks it's a good idea to add entries at the start of an
# enum, rather than the end. If the build suddenly fails with messages about # enum, rather than the end. If the build suddenly fails with messages about
# missing instruction sets, you may need to poke around in the source a bit. # missing instruction sets, you may need to poke around in the source a bit.
#
# Upstream recommends the x86x87/x86SSE2 architectures for generic x86/x86_64
# for distribution builds. Additionally, we set 'cacheEdge' to reduce impurity.
# Otherwise, the cache parameters will be detected by timing which will be
# highly variable on Hydra.
let let
inherit (stdenv.lib) optional optionalString;
version = "3.10.2"; version = "3.10.2";
optionalString = stdenv.lib.optionalString;
optional = stdenv.lib.optional;
in in
stdenv.mkDerivation { stdenv.mkDerivation {
@ -50,27 +65,50 @@ stdenv.mkDerivation {
patches = optional tolerateCpuTimingInaccuracy ./disable-timing-accuracy-check.patch; patches = optional tolerateCpuTimingInaccuracy ./disable-timing-accuracy-check.patch;
# Configure outside of the source directory. # Configure outside of the source directory.
preConfigure = '' mkdir build; cd build; configureScript=../configure; ''; preConfigure = ''
mkdir build
cd build
configureScript=../configure
'';
# * -fPIC is passed even in non-shared builds so that the ATLAS code can be # * -fPIC is passed even in non-shared builds so that the ATLAS code can be
# used to inside of shared libraries, like Octave does. # used to inside of shared libraries, like Octave does.
# #
# * -t 0 disables use of multi-threading. It's not quite clear what the # * -t 0 disables use of multi-threading. It's not quite clear what the
# consequences of that setting are and whether it's necessary or not. # consequences of that setting are and whether it's necessary or not.
configureFlags = "-Fa alg -fPIC -t 0 ${cpuConfig}" + optionalString shared " --shared"; configureFlags = [
"-Fa alg"
"-fPIC"
"-t ${threads}"
cpuConfig
] ++ optional shared "--shared";
postConfigure = ''
if [[ -n "${cacheEdge}" ]]; then
echo '#define CacheEdge ${cacheEdge}' >> include/atlas_cacheedge.h
echo '#define CacheEdge ${cacheEdge}' >> include/atlas_tcacheedge.h
fi
'';
doCheck = true; doCheck = true;
postInstall = ''
# Avoid name collision with the real lapack (ATLAS only builds a partial
# lapack).
mv $out/lib/liblapack.a $out/lib/liblapack_atlas.a
'';
meta = { meta = {
homepage = "http://math-atlas.sourceforge.net/"; homepage = "http://math-atlas.sourceforge.net/";
description = "Automatically Tuned Linear Algebra Software (ATLAS)"; description = "Automatically Tuned Linear Algebra Software (ATLAS)";
license = stdenv.lib.licenses.bsd3; license = stdenv.lib.licenses.bsd3;
longDescription = '' longDescription = ''
The ATLAS (Automatically Tuned Linear Algebra Software) project is an ongoing The ATLAS (Automatically Tuned Linear Algebra Software) project is an
research effort focusing on applying empirical techniques in order to provide ongoing research effort focusing on applying empirical techniques in
portable performance. At present, it provides C and Fortran77 interfaces to a order to provide portable performance. At present, it provides C and
portably efficient BLAS implementation, as well as a few routines from LAPACK. Fortran77 interfaces to a portably efficient BLAS implementation, as well
as a few routines from LAPACK.
''; '';
maintainers = with stdenv.lib.maintainers; [ ttuegel ]; maintainers = with stdenv.lib.maintainers; [ ttuegel ];