mirror of
https://github.com/facebook/sapling.git
synced 2024-10-07 07:17:55 +03:00
basepack: workaround Python's mmap fd limit
Summary: This is a resend of https://phab.mercurial-scm.org/D1430, without breaking Windows. I encountered "too many opened files" problem due to treemanifest packs on my laptop. This patch seems to be the easiest solution without side effects. Other choices are deleting files (seem like an non-ideal workaround), forcing a repack (could be slow), and rewriting using Rust (could take too long). The root cause is Python's `mmap` implementation has to keep a fd internally to support `mmapobj.resize` API. We only need read-only operation on the mmap object so the fd is unnecessary. Re-implement a minimal mmap interface for this purpose. Reviewed By: DurhamG Differential Revision: D6835890 fbshipit-source-id: 74c429e957cb8677682604eb02fc38b5b8d13ef7
This commit is contained in:
parent
e8883f6131
commit
e2a5493b04
1
.gitignore
vendored
1
.gitignore
vendored
@ -62,6 +62,7 @@ hgext/clindex.c
|
||||
hgext/extlib/linelog.c
|
||||
hgext/patchrmdir.c
|
||||
hgext/traceprof.cpp
|
||||
hgext/extlib/litemmap.c
|
||||
|
||||
# Packaging sources
|
||||
SOURCES/
|
||||
|
@ -197,6 +197,7 @@ def populateextmods(localmods):
|
||||
'hgext.extlib.cstore',
|
||||
'hgext.extlib.indexes',
|
||||
'hgext.extlib.linelog',
|
||||
'hgext.extlib.litemmap',
|
||||
'hgext.extlib.treedirstate',
|
||||
'hgext.patchrmdir',
|
||||
'hgext.traceprof',
|
||||
|
79
hgext/extlib/litemmap.pyx
Normal file
79
hgext/extlib/litemmap.pyx
Normal file
@ -0,0 +1,79 @@
|
||||
# litemmap.pyx - read-only mmap implementation that does not keep a fd open
|
||||
#
|
||||
# Copyright 2018 Facebook, Inc.
|
||||
#
|
||||
# This software may be used and distributed according to the terms of the
|
||||
# GNU General Public License version 2 or any later version.
|
||||
|
||||
import mmap as pymmap
|
||||
|
||||
IF UNAME_SYSNAME == "Windows":
|
||||
# Not supporting windows for now - fallback to Python's mmap
|
||||
mmap = pymmap.mmap
|
||||
ELSE:
|
||||
from cpython.bytes cimport PyBytes_FromStringAndSize
|
||||
from libc.errno cimport errno
|
||||
from posix cimport mman, stat
|
||||
|
||||
import os
|
||||
|
||||
cdef extern from "sys/mman.h":
|
||||
cdef char *MAP_FAILED
|
||||
|
||||
cdef _raise_oserror(message=''):
|
||||
if message:
|
||||
message += ': '
|
||||
message += os.strerror(errno)
|
||||
return OSError(errno, message)
|
||||
|
||||
cdef class mmap:
|
||||
cdef char *ptr
|
||||
cdef size_t len
|
||||
|
||||
def __cinit__(self, int fd, size_t length, int
|
||||
access=pymmap.ACCESS_READ):
|
||||
cdef stat.struct_stat st
|
||||
# Only support read-only case
|
||||
if access != pymmap.ACCESS_READ:
|
||||
raise RuntimeError('access %s is unsupported' % access)
|
||||
|
||||
# If length is 0, read size from file
|
||||
if length == 0:
|
||||
r = stat.fstat(fd, &st)
|
||||
if r != 0:
|
||||
_raise_oserror('fstat failed')
|
||||
length = st.st_size
|
||||
|
||||
self.ptr = <char*>mman.mmap(NULL, length, mman.PROT_READ,
|
||||
mman.MAP_SHARED, fd, 0)
|
||||
if self.ptr == MAP_FAILED:
|
||||
_raise_oserror('mmap failed')
|
||||
|
||||
self.len = length
|
||||
|
||||
cpdef close(self):
|
||||
if self.ptr == NULL:
|
||||
return
|
||||
|
||||
r = mman.munmap(self.ptr, self.len)
|
||||
if r != 0:
|
||||
_raise_oserror('munmap failed')
|
||||
self.ptr = NULL
|
||||
|
||||
def __getslice__(self, Py_ssize_t i, Py_ssize_t j):
|
||||
if self.ptr == NULL:
|
||||
raise RuntimeError('mmap closed')
|
||||
if i < 0:
|
||||
i = 0
|
||||
if j > self.len:
|
||||
j = self.len
|
||||
if i > j:
|
||||
i = j
|
||||
return PyBytes_FromStringAndSize(self.ptr + i, j - i)
|
||||
|
||||
def __len__(self):
|
||||
return self.len
|
||||
|
||||
def __dealloc__(self):
|
||||
self.close()
|
||||
|
@ -1,12 +1,21 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
import errno, hashlib, mmap, os, struct, time
|
||||
import collections
|
||||
import errno
|
||||
import hashlib
|
||||
import os
|
||||
import struct
|
||||
import time
|
||||
|
||||
from collections import defaultdict
|
||||
from mercurial import policy, pycompat, util
|
||||
from mercurial.i18n import _
|
||||
from mercurial import vfs as vfsmod
|
||||
from mercurial import (
|
||||
policy,
|
||||
pycompat,
|
||||
util,
|
||||
vfs as vfsmod,
|
||||
)
|
||||
|
||||
from ..extlib import litemmap
|
||||
from . import shallowutil
|
||||
|
||||
osutil = policy.importmod(r'osutil')
|
||||
@ -137,8 +146,8 @@ class basepackstore(object):
|
||||
packsuffixlen = len(self.PACKSUFFIX)
|
||||
|
||||
ids = set()
|
||||
sizes = defaultdict(lambda: 0)
|
||||
mtimes = defaultdict(lambda: [])
|
||||
sizes = collections.defaultdict(lambda: 0)
|
||||
mtimes = collections.defaultdict(lambda: [])
|
||||
try:
|
||||
for filename, type, stat in osutil.listdir(self.path, stat=True):
|
||||
id = None
|
||||
@ -310,8 +319,9 @@ class basepack(versionmixin):
|
||||
if self.VERSION == 0:
|
||||
return self.indexsize
|
||||
else:
|
||||
nodecount = struct.unpack_from('!Q', self._index,
|
||||
self.params.indexstart - 8)[0]
|
||||
offset = self.params.indexstart - 8
|
||||
nodecount = struct.unpack_from('!Q',
|
||||
self._index[offset:offset + 8])[0]
|
||||
return self.params.indexstart + nodecount * self.INDEXENTRYLENGTH
|
||||
|
||||
def freememory(self):
|
||||
@ -328,10 +338,9 @@ class basepack(versionmixin):
|
||||
# TODO: use an opener/vfs to access these paths
|
||||
with open(self.indexpath, PACKOPENMODE) as indexfp:
|
||||
# memory-map the file, size 0 means whole file
|
||||
self._index = mmap.mmap(indexfp.fileno(), 0,
|
||||
access=mmap.ACCESS_READ)
|
||||
self._index = litemmap.mmap(indexfp.fileno(), 0)
|
||||
with open(self.packpath, PACKOPENMODE) as datafp:
|
||||
self._data = mmap.mmap(datafp.fileno(), 0, access=mmap.ACCESS_READ)
|
||||
self._data = litemmap.mmap(datafp.fileno(), 0)
|
||||
|
||||
self._pagedin = 0
|
||||
return True
|
||||
|
@ -321,7 +321,7 @@ class datapack(basepack.basepack):
|
||||
|
||||
if self.VERSION == 1:
|
||||
# <4 byte len> + <metadata-list>
|
||||
metalen = struct.unpack_from('!I', data, offset)[0]
|
||||
metalen = struct.unpack('!I', data[offset:offset + 4])[0]
|
||||
offset += 4 + metalen
|
||||
|
||||
yield (filename, node, deltabase, uncompressedlen)
|
||||
|
3
setup.py
3
setup.py
@ -1058,6 +1058,9 @@ extmodules += cythonize([
|
||||
Extension('hgext.clindex',
|
||||
sources=['hgext/clindex.pyx'],
|
||||
extra_compile_args=filter(None, [STDC99, PRODUCEDEBUGSYMBOLS])),
|
||||
Extension('hgext.extlib.litemmap',
|
||||
sources=['hgext/extlib/litemmap.pyx'],
|
||||
extra_compile_args=filter(None, [STDC99, PRODUCEDEBUGSYMBOLS])),
|
||||
Extension('hgext.patchrmdir',
|
||||
sources=['hgext/patchrmdir.pyx'],
|
||||
extra_compile_args=filter(None, [PRODUCEDEBUGSYMBOLS])),
|
||||
|
@ -168,10 +168,6 @@ outputs, which should be fixed later.
|
||||
hgext/remotefilelog/__init__.py:103: stdlib import "os" follows local import: mercurial
|
||||
hgext/remotefilelog/__init__.py:104: stdlib import "time" follows local import: mercurial
|
||||
hgext/remotefilelog/__init__.py:105: stdlib import "traceback" follows local import: mercurial
|
||||
hgext/remotefilelog/basepack.py:3: multiple imported names: errno, hashlib, mmap, os, struct, time
|
||||
hgext/remotefilelog/basepack.py:5: relative import of stdlib module
|
||||
hgext/remotefilelog/basepack.py:5: direct symbol import defaultdict from collections
|
||||
hgext/remotefilelog/basepack.py:7: symbol import follows non-symbol import: mercurial.i18n
|
||||
hgext/remotefilelog/basestore.py:3: multiple imported names: errno, hashlib, os, shutil, stat, time
|
||||
hgext/remotefilelog/basestore.py:15: symbol import follows non-symbol import: mercurial.i18n
|
||||
hgext/remotefilelog/basestore.py:16: symbol import follows non-symbol import: mercurial.node
|
||||
|
Loading…
Reference in New Issue
Block a user