basepack: workaround Python's mmap fd limit

Summary:
This is a resend of https://phab.mercurial-scm.org/D1430, without breaking
Windows.

I encountered "too many opened files" problem due to treemanifest packs on my
laptop. This patch seems to be the easiest solution without side effects. Other
choices are deleting files (seem like an non-ideal workaround), forcing a
repack (could be slow), and rewriting using Rust (could take too long).

The root cause is Python's `mmap` implementation has to keep a fd internally
to support `mmapobj.resize` API. We only need read-only operation on the
mmap object so the fd is unnecessary. Re-implement a minimal mmap interface
for this purpose.

Reviewed By: DurhamG

Differential Revision: D6835890

fbshipit-source-id: 74c429e957cb8677682604eb02fc38b5b8d13ef7
This commit is contained in:
Jun Wu 2018-01-29 21:19:59 -08:00 committed by Saurabh Singh
parent e8883f6131
commit e2a5493b04
7 changed files with 105 additions and 16 deletions

1
.gitignore vendored
View File

@ -62,6 +62,7 @@ hgext/clindex.c
hgext/extlib/linelog.c
hgext/patchrmdir.c
hgext/traceprof.cpp
hgext/extlib/litemmap.c
# Packaging sources
SOURCES/

View File

@ -197,6 +197,7 @@ def populateextmods(localmods):
'hgext.extlib.cstore',
'hgext.extlib.indexes',
'hgext.extlib.linelog',
'hgext.extlib.litemmap',
'hgext.extlib.treedirstate',
'hgext.patchrmdir',
'hgext.traceprof',

79
hgext/extlib/litemmap.pyx Normal file
View File

@ -0,0 +1,79 @@
# litemmap.pyx - read-only mmap implementation that does not keep a fd open
#
# Copyright 2018 Facebook, Inc.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
import mmap as pymmap
IF UNAME_SYSNAME == "Windows":
# Not supporting windows for now - fallback to Python's mmap
mmap = pymmap.mmap
ELSE:
from cpython.bytes cimport PyBytes_FromStringAndSize
from libc.errno cimport errno
from posix cimport mman, stat
import os
cdef extern from "sys/mman.h":
cdef char *MAP_FAILED
cdef _raise_oserror(message=''):
if message:
message += ': '
message += os.strerror(errno)
return OSError(errno, message)
cdef class mmap:
cdef char *ptr
cdef size_t len
def __cinit__(self, int fd, size_t length, int
access=pymmap.ACCESS_READ):
cdef stat.struct_stat st
# Only support read-only case
if access != pymmap.ACCESS_READ:
raise RuntimeError('access %s is unsupported' % access)
# If length is 0, read size from file
if length == 0:
r = stat.fstat(fd, &st)
if r != 0:
_raise_oserror('fstat failed')
length = st.st_size
self.ptr = <char*>mman.mmap(NULL, length, mman.PROT_READ,
mman.MAP_SHARED, fd, 0)
if self.ptr == MAP_FAILED:
_raise_oserror('mmap failed')
self.len = length
cpdef close(self):
if self.ptr == NULL:
return
r = mman.munmap(self.ptr, self.len)
if r != 0:
_raise_oserror('munmap failed')
self.ptr = NULL
def __getslice__(self, Py_ssize_t i, Py_ssize_t j):
if self.ptr == NULL:
raise RuntimeError('mmap closed')
if i < 0:
i = 0
if j > self.len:
j = self.len
if i > j:
i = j
return PyBytes_FromStringAndSize(self.ptr + i, j - i)
def __len__(self):
return self.len
def __dealloc__(self):
self.close()

View File

@ -1,12 +1,21 @@
from __future__ import absolute_import
import errno, hashlib, mmap, os, struct, time
import collections
import errno
import hashlib
import os
import struct
import time
from collections import defaultdict
from mercurial import policy, pycompat, util
from mercurial.i18n import _
from mercurial import vfs as vfsmod
from mercurial import (
policy,
pycompat,
util,
vfs as vfsmod,
)
from ..extlib import litemmap
from . import shallowutil
osutil = policy.importmod(r'osutil')
@ -137,8 +146,8 @@ class basepackstore(object):
packsuffixlen = len(self.PACKSUFFIX)
ids = set()
sizes = defaultdict(lambda: 0)
mtimes = defaultdict(lambda: [])
sizes = collections.defaultdict(lambda: 0)
mtimes = collections.defaultdict(lambda: [])
try:
for filename, type, stat in osutil.listdir(self.path, stat=True):
id = None
@ -310,8 +319,9 @@ class basepack(versionmixin):
if self.VERSION == 0:
return self.indexsize
else:
nodecount = struct.unpack_from('!Q', self._index,
self.params.indexstart - 8)[0]
offset = self.params.indexstart - 8
nodecount = struct.unpack_from('!Q',
self._index[offset:offset + 8])[0]
return self.params.indexstart + nodecount * self.INDEXENTRYLENGTH
def freememory(self):
@ -328,10 +338,9 @@ class basepack(versionmixin):
# TODO: use an opener/vfs to access these paths
with open(self.indexpath, PACKOPENMODE) as indexfp:
# memory-map the file, size 0 means whole file
self._index = mmap.mmap(indexfp.fileno(), 0,
access=mmap.ACCESS_READ)
self._index = litemmap.mmap(indexfp.fileno(), 0)
with open(self.packpath, PACKOPENMODE) as datafp:
self._data = mmap.mmap(datafp.fileno(), 0, access=mmap.ACCESS_READ)
self._data = litemmap.mmap(datafp.fileno(), 0)
self._pagedin = 0
return True

View File

@ -321,7 +321,7 @@ class datapack(basepack.basepack):
if self.VERSION == 1:
# <4 byte len> + <metadata-list>
metalen = struct.unpack_from('!I', data, offset)[0]
metalen = struct.unpack('!I', data[offset:offset + 4])[0]
offset += 4 + metalen
yield (filename, node, deltabase, uncompressedlen)

View File

@ -1058,6 +1058,9 @@ extmodules += cythonize([
Extension('hgext.clindex',
sources=['hgext/clindex.pyx'],
extra_compile_args=filter(None, [STDC99, PRODUCEDEBUGSYMBOLS])),
Extension('hgext.extlib.litemmap',
sources=['hgext/extlib/litemmap.pyx'],
extra_compile_args=filter(None, [STDC99, PRODUCEDEBUGSYMBOLS])),
Extension('hgext.patchrmdir',
sources=['hgext/patchrmdir.pyx'],
extra_compile_args=filter(None, [PRODUCEDEBUGSYMBOLS])),

View File

@ -168,10 +168,6 @@ outputs, which should be fixed later.
hgext/remotefilelog/__init__.py:103: stdlib import "os" follows local import: mercurial
hgext/remotefilelog/__init__.py:104: stdlib import "time" follows local import: mercurial
hgext/remotefilelog/__init__.py:105: stdlib import "traceback" follows local import: mercurial
hgext/remotefilelog/basepack.py:3: multiple imported names: errno, hashlib, mmap, os, struct, time
hgext/remotefilelog/basepack.py:5: relative import of stdlib module
hgext/remotefilelog/basepack.py:5: direct symbol import defaultdict from collections
hgext/remotefilelog/basepack.py:7: symbol import follows non-symbol import: mercurial.i18n
hgext/remotefilelog/basestore.py:3: multiple imported names: errno, hashlib, os, shutil, stat, time
hgext/remotefilelog/basestore.py:15: symbol import follows non-symbol import: mercurial.i18n
hgext/remotefilelog/basestore.py:16: symbol import follows non-symbol import: mercurial.node