sapling/eden/scm/edenscm/mercurial/filesystem.py
Durham Goode 6e722132f6 filesystem: sort errors before printing them
Summary:
We see some hgbuild jobs failing because the order of errors is
different from what I see on my devserver. Let's sort them to make them stable.
This is presumably because we're operating in the order returned by readdir,
which is not guaranteed to be sorted.

Reviewed By: xavierd

Differential Revision: D20500566

fbshipit-source-id: bd4d3db1b77cd4bd7259f9bcc10bc65649fae7c6
2020-03-17 18:07:03 -07:00

547 lines
21 KiB
Python

# Portions Copyright (c) Facebook, Inc. and its affiliates.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2.
# Copyright Matt Mackall <mpm@selenic.com> and others
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
from __future__ import absolute_import
import errno
import os
import stat
from typing import Callable, Iterable, Optional, Tuple
from bindings import workingcopy
from edenscm.mercurial import match as matchmod, registrar
from . import encoding, error, pathutil, util, vfs as vfsmod
from .i18n import _
from .node import hex
_rangemask = 0x7FFFFFFF
configtable = {}
configitem = registrar.configitem(configtable)
configitem("workingcopy", "enablerustwalker", default=False)
class physicalfilesystem(object):
def __init__(self, root, dirstate):
self.root = root
self.ui = dirstate._ui
self.opener = vfsmod.vfs(
root, expandpath=True, realpath=True, cacheaudited=False
)
# This is needed temporarily to enable an incremental migration of
# functionality to this layer.
self.dirstate = dirstate
self.mtolog = self.ui.configint("experimental", "samplestatus") or 0
self.ltolog = self.mtolog
self.dtolog = self.mtolog
self.ftolog = self.mtolog
self.cleanlookups = []
def _ischanged(self, fn, st, lookups):
try:
t = self.dirstate._map[fn]
except KeyError:
t = ("?", 0, 0, 0)
if st is None:
return (fn, False)
state = t[0]
if state in "a?":
return (fn, True)
elif state in "mnr":
# 'm' and 'n' states mean the dirstate is tracking the file, so
# we need to check if it's modified.
# 'r' means the dirstate thinks the file is removed, but because
# we just encountered it in the walk we know it's not actually
# deleted. pendingchanges() purpose is only to report if the
# file is changed, so we check this file just like if it was
# 'n', then the upper dirstate/workingcopy layer can decide to
# report the file as 'r' if needed.
# This is equivalent to 'state, mode, size, time = dmap[fn]' but not
# written like that for performance reasons. dmap[fn] is not a
# Python tuple in compiled builds. The CPython UNPACK_SEQUENCE
# opcode has fast paths when the value to be unpacked is a tuple or
# a list, but falls back to creating a full-fledged iterator in
# general. That is much slower than simply accessing and storing the
# tuple members one by one.
mode = t[1]
size = t[2]
time = t[3]
if size >= 0 and (
(size != st.st_size and size != st.st_size & _rangemask)
or ((mode ^ st.st_mode) & 0o100 and self.dirstate._checkexec)
):
if self.mtolog > 0:
reasons = []
if size == -2:
reasons.append("exists in p2")
elif size != st.st_size:
reasons.append("size changed (%s -> %s)" % (size, st.st_size))
# See T39234759. Sometimes watchman returns 0 size
# (st.st_size) and we suspect it's incorrect.
# Do a double check with os.stat and log it.
if st.st_size == 0:
path = self.opener.join(fn)
try:
reasons.append(
"os.stat size = %s" % os.stat(path).st_size
)
except Exception as ex:
reasons.append("os.stat failed (%s)" % ex)
if mode != st.st_mode:
reasons.append("mode changed (%s -> %s)" % (mode, st.st_mode))
self.ui.log("status", "M %s: %s" % (fn, ", ".join(reasons)))
return (fn, True)
elif (
time != st.st_mtime and time != st.st_mtime & _rangemask
) or st.st_mtime == self.dirstate._lastnormaltime:
if self.ltolog:
self.ltolog -= 1
if st.st_mtime == self.dirstate._lastnormaltime:
reason = "mtime untrusted (%s)" % (st.st_mtime)
else:
reason = "mtime changed (%s -> %s)" % (time, st.st_mtime)
self.ui.log("status", "L %s: %s" % (fn, reason))
lookups.append(fn)
return None
else:
if self.dirstate._istreestate:
self.dirstate.clearneedcheck(fn)
return False
else:
raise error.ProgrammingError(
"filesystem.walk should not yield state '%s' for '%s'" % (state, fn)
)
def _compareondisk(self, path):
"""Compares the on-disk file content with the clean-checkout content.
Return True if on-disk is different, False if it is the same, and None
of the on-disk file is deleted or no longer accessible.
"""
repo = self.dirstate._repo
p1 = self.dirstate.parents()[0]
wctx = repo[None]
pctx = repo[p1]
try:
# This will return True for a file that got replaced by a
# directory in the interim, but fixing that is pretty hard.
if (
path not in pctx
or wctx.flags(path) != pctx.flags(path)
or pctx[path].cmp(wctx[path])
):
# Has changed
return True
else:
# Has not changed
return False
except (IOError, OSError):
# A file become inaccessible in between? Mark it as deleted,
# matching dirstate behavior (issue5584).
# The dirstate has more complex behavior around whether a
# missing file matches a directory, etc, but we don't need to
# bother with that: if f has made it to this point, we're sure
# it's in the dirstate.
return None
def pendingchanges(self, match=None, listignored=False):
# type: (Optional[Callable[[str], bool]], bool) -> Iterable[Tuple[str, bool]]
"""Yields all the files that differ from the pristine tree.
Returns an iterator of (string, bool), where the string is the
repo-rooted file path and the bool is whether the file exists on disk
or not.
"""
results = []
for fn in self._pendingchanges(match, listignored):
results.append(fn[0])
yield fn
oldid = self.dirstate.identity()
self._postpendingfixup(oldid, results)
def _pendingchanges(self, match, listignored):
dmap = self.dirstate._map
dmap.preload()
if match is None:
match = util.always
seen = set()
walkfn = self._walk
if self.ui.configbool("workingcopy", "enablerustwalker"):
walkfn = self._rustwalk
lookups = []
for fn, st in walkfn(match, listignored):
seen.add(fn)
changed = self._ischanged(fn, st, lookups)
if changed:
yield changed
auditpath = pathutil.pathauditor(self.root, cached=True)
# Identify files that should exist but were not seen in the walk and
# report them as changed.
dget = dmap.__getitem__
parentmf = None
for fn in dmap:
if fn in seen or not match(fn):
continue
t = dget(fn)
state = t[0]
size = t[2]
# If it came from the other parent and it doesn't exist in p1,
# ignore it here. We only want to report changes relative to the
# pristine p1 tree. For hg status, the higher level dirstate will
# add in anything that came from p2.
if size == -2:
if parentmf is None:
repo = self.dirstate._repo
p1 = self.dirstate.parents()[0]
pctx = repo[p1]
parentmf = pctx.manifest()
if fn not in parentmf:
continue
# We might not've seen a path because it's in a directory that's
# ignored and the walk didn't go down that path. So let's double
# check for the existence of that file.
st = list(util.statfiles([self.opener.join(fn)]))[0]
# auditpath checks to see if the file is under a symlink directory.
# If it is, we treat it the same as if it didn't exist.
if st is None or not auditpath.check(fn):
# Don't report it as deleted if it wasn't in the original tree,
# because pendingchanges is only supposed to report differences
# from the original tree. The higher level dirstate code will
# handle testing if added files are still there.
if state in "a":
continue
yield (fn, False)
else:
changed = self._ischanged(fn, st, lookups)
if changed:
yield changed
for changed in self._processlookups(lookups):
yield changed
@util.timefunction("fswalk", 0, "ui")
def _rustwalk(self, match, listignored=False):
join = self.opener.join
if not listignored:
# Have the matcher skip ignored files. Technically exact files
# provided by the user should be returned even if they're ignored.
# The differencematcher handles this and returns True for exact
# matches, even if they should be subtracted.
origmatch = matchmod.differencematcher(match, self.dirstate._ignore)
normalize = self.dirstate.normalize
class normalizematcher(object):
def visitdir(self, path):
return origmatch.visitdir(normalize(path))
def __call__(self, path):
return origmatch(normalize(path))
def bad(self, path, msg):
return origmatch.bad(path, msg)
match = normalizematcher()
match.traversedir = origmatch.traversedir
traversedir = bool(match.traversedir)
walker = workingcopy.walker(join(""), match, traversedir)
for fn in walker:
fn = self.dirstate.normalize(fn)
st = util.lstat(join(fn))
if traversedir and stat.S_ISDIR(st.st_mode):
match.traversedir(fn)
else:
yield fn, st
# Sorted for test stability
for path, walkerror in sorted(walker.errors()):
# Warn about non-utf8 errors, but don't report them as bad.
# Ideally we'd inspect the error type, but it's lost coming from
# Rust. When this moves to Rust it will get easier.
if walkerror == "invalid file name encoding":
self.ui.warn(_("skipping invalid utf-8 filename: '%s'\n") % path)
continue
match.bad(path, walkerror)
def _processlookups(self, lookups):
repo = self.dirstate._repo
if util.safehasattr(repo, "fileservice"):
p1 = self.dirstate.parents()[0]
p1mf = repo[p1].manifest()
repo.fileservice.prefetch((f, hex(p1mf[f])) for f in lookups if f in p1mf)
# Sort so we get deterministic ordering. This is important for tests.
for fn in sorted(lookups):
changed = self._compareondisk(fn)
if changed is None:
# File no longer exists
if self.dtolog > 0:
self.dtolog -= 1
self.ui.log("status", "R %s: checked in filesystem" % fn)
yield (fn, False)
elif changed is True:
# File exists and is modified
if self.mtolog > 0:
self.mtolog -= 1
self.ui.log("status", "M %s: checked in filesystem" % fn)
yield (fn, True)
else:
# File exists and is clean
if self.ftolog > 0:
self.ftolog -= 1
self.ui.log("status", "C %s: checked in filesystem" % fn)
self.cleanlookups.append(fn)
@util.timefunction("fswalk", 0, "ui")
def _walk(self, match, listignored=False):
join = self.opener.join
listdir = util.listdir
dirkind = stat.S_IFDIR
regkind = stat.S_IFREG
lnkkind = stat.S_IFLNK
badfn = match.bad
matchfn = match.matchfn
matchalways = match.always()
matchtdir = match.traversedir
dmap = self.dirstate._map
ignore = self.dirstate._ignore
dirignore = self.dirstate._dirignore
if listignored:
ignore = util.never
dirignore = util.never
normalize = self.dirstate.normalize
normalizefile = None
if self.dirstate._checkcase:
normalizefile = self.dirstate._normalizefile
# Explicitly listed files circumvent the ignored matcher, so let's
# record which directories we need to handle.
# TODO: All ignore logic should be encapsulated in the matcher and
# shouldn't be special cased here.
explicitfiles = set(match.files())
explicitdirs = set(util.dirs(explicitfiles))
work = [""]
wadd = work.append
seen = set()
while work:
nd = work.pop()
if not match.visitdir(nd) or nd == ".hg":
continue
skip = None
if nd != "":
skip = ".hg"
try:
entries = listdir(join(nd), stat=True, skip=skip)
except OSError as inst:
if inst.errno in (errno.EACCES, errno.ENOENT):
match.bad(nd, encoding.strtolocal(inst.strerror))
continue
raise
for f, kind, st in entries:
if not util.isvalidutf8(f):
self.ui.warn(_("skipping invalid utf-8 filename: '%s'\n") % f)
continue
if normalizefile:
# even though f might be a directory, we're only
# interested in comparing it to files currently in the
# dmap -- therefore normalizefile is enough
nf = normalizefile(nd and (nd + "/" + f) or f, True, True)
else:
nf = nd and (nd + "/" + f) or f
if nf not in seen:
seen.add(nf)
if kind == dirkind:
if not dirignore(nf) or nf in explicitdirs:
if matchtdir:
matchtdir(nf)
nf = normalize(nf, True, True)
wadd(nf)
elif matchalways or matchfn(nf):
if kind == regkind or kind == lnkkind:
if nf in dmap:
yield (nf, st)
elif not ignore(nf):
# unknown file
yield (nf, st)
else:
# This can happen for unusual file types, like named
# piped. We treat them as if they were missing, so
# report them as missing. Covered in test-symlinks.t
if nf in explicitfiles:
badfn(nf, badtype(kind))
def purge(self, match, keepfiles, removefiles, removedirs, removeignored, dryrun):
"""Deletes untracked files and directories from the filesystem.
keepfiles: The list of files that should not be deleted. This is
generally added files, or modified files from a second parent. It's
useful for filesystems which don't have direct access to the working
copy data.
removefiles: Whether to delete untracked files.
removedirs: Whether to delete empty directories.
removeignored: Whether to delete ignored files and directories.
dryrun: Whether to actually perform the delete.
Returns a tuple of (files, dirs, errors) indicating files and
directories that were deleted (or, if a dry-run, should be deleted) and
any errors that were encountered.
"""
errors = []
join = self.dirstate._repo.wjoin
def remove(remove_func, name):
try:
remove_func(join(name))
except OSError:
errors.append(_("%s cannot be removed") % name)
files, dirs = findthingstopurge(
self.dirstate, match, removefiles, removedirs, removeignored
)
files = list(files)
if not dryrun:
for f in files:
remove(util.unlink, f)
# Only evaluate dirs after deleting files, since the lazy evaluation
# will be checking to see if the directory is empty.
if not dryrun:
resultdirs = []
for f in dirs:
resultdirs.append(f)
remove(os.rmdir, f)
else:
resultdirs = list(dirs)
return files, resultdirs, errors
def _postpendingfixup(self, oldid, changed):
"""update dirstate for files that are actually clean"""
if self.cleanlookups or self.dirstate._dirty:
try:
repo = self.dirstate._repo
# Updating the dirstate is optional so we don't wait on the
# lock.
with repo.disableeventreporting(), repo.wlock(False):
# The dirstate may have been reloaded after the wlock
# was taken, so load it again.
newdirstate = repo.dirstate
if newdirstate.identity() == oldid:
self._marklookupsclean()
# write changes out explicitly, because nesting
# wlock at runtime may prevent 'wlock.release()'
# after this block from doing so for subsequent
# changing files
#
# This is a no-op if dirstate is not dirty.
tr = repo.currenttransaction()
newdirstate.write(tr)
else:
# in this case, writing changes out breaks
# consistency, because .hg/dirstate was
# already changed simultaneously after last
# caching (see also issue5584 for detail)
repo.ui.debug("skip marking lookups clean: identity mismatch\n")
except error.LockError:
pass
def _marklookupsclean(self):
dirstate = self.dirstate
normal = dirstate.normal
newdmap = dirstate._map
cleanlookups = self.cleanlookups
self.cleanlookups = []
for f in cleanlookups:
# Only make something clean if it's already in a
# normal state. Things in other states, like 'm'
# merge state, should not be marked clean.
entry = newdmap[f]
if entry[0] == "n" and f not in newdmap.copymap and entry[2] != -2:
# It may have been a while since we added the
# file to cleanlookups, so double check that
# it's still clean.
if self._compareondisk(f) is False:
normal(f)
def findthingstopurge(dirstate, match, findfiles, finddirs, includeignored):
"""Find files and/or directories that should be purged.
Returns a pair (files, dirs), where files is an iterable of files to
remove, and dirs is an iterable of directories to remove.
"""
wvfs = dirstate._repo.wvfs
if finddirs:
directories = set(f for f in match.files() if wvfs.isdir(f))
match.traversedir = directories.add
status = dirstate.status(match, includeignored, False, True)
if findfiles:
files = sorted(status.unknown + status.ignored)
else:
files = []
if finddirs:
# Use a generator expression to lazily test for directory contents,
# otherwise nested directories that are being removed would be counted
# when in reality they'd be removed already by the time the parent
# directory is to be removed.
dirs = (
f
for f in sorted(directories, reverse=True)
if (match(f) and not os.listdir(wvfs.join(f)))
)
else:
dirs = []
return files, dirs
def badtype(mode):
kind = _("unknown")
if stat.S_ISCHR(mode):
kind = _("character device")
elif stat.S_ISBLK(mode):
kind = _("block device")
elif stat.S_ISFIFO(mode):
kind = _("fifo")
elif stat.S_ISSOCK(mode):
kind = _("socket")
elif stat.S_ISDIR(mode):
kind = _("directory")
return _("unsupported file type (type is %s)") % kind