1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347
|
# git2hg.py - convert Git repositories and commits to Mercurial ones
import collections
import io
import itertools
from dulwich import config as dul_config
from dulwich.objects import Commit, Tag
from dulwich.refs import (
ANNOTATED_TAG_SUFFIX,
LOCAL_BRANCH_PREFIX,
LOCAL_TAG_PREFIX,
)
from mercurial.i18n import _
from mercurial.node import bin, short
from mercurial import error, util as hgutil
from mercurial import phases
from . import config
def get_public(ui, refs, remote_names):
cfg = config.get_publishing_option(ui, remote_names)
paths = list(
itertools.chain.from_iterable(
ui.paths.get(name) for name in remote_names
)
)
# we may have multiple paths listed, so parse their configuration
# and deduplicate it
configs = {path.hggit_publish for path in paths}
# and if we find more then one, we don't know which is correct
# (but if we actually had the original path object somehow, we
# wouldn't have to do this)
if len(configs) > 1:
raise error.Abort(
b'different publishing configurations for the same remote '
b'location',
hint=(b'conflicting paths: ' + b", ".join(sorted(remote_names))),
)
if configs and configs != {None}:
cfg = configs.pop()
use_phases, publish_defaults, refs_to_publish = cfg
if not use_phases:
return {}
to_publish = set()
for remote_name in remote_names:
refs_to_publish |= {
ref[len(remote_name) + 1 :]
for ref in refs_to_publish
if ref.startswith(remote_name + b'/')
}
for ref_name, sha in refs.items():
if ref_name.startswith(LOCAL_BRANCH_PREFIX):
branch = ref_name[len(LOCAL_BRANCH_PREFIX) :]
if branch in refs_to_publish:
ui.note(b"publishing branch %s\n" % branch)
to_publish.add(sha)
elif ref_name.startswith(LOCAL_TAG_PREFIX):
tag = ref_name[len(LOCAL_TAG_PREFIX) :]
if publish_defaults or tag in refs_to_publish:
ui.note(
b"publishing tag %s\n" % ref_name[len(LOCAL_TAG_PREFIX) :]
)
to_publish.add(sha)
elif publish_defaults and ref_name == b'HEAD':
ui.note(b"publishing remote HEAD\n")
to_publish.add(sha)
return to_publish
def find_incoming(ui, git_object_store, git_map, refs, remote):
'''find what commits need to be imported
git_object_store: is a dulwich object store.
git_map: is a map with keys being Git commits that have already been
imported
refs: is a map of refs to SHAs that we're interested in.
'''
public = get_public(ui, refs, remote)
done = set()
# sort by commit date
def commitdate(sha):
obj = git_object_store[sha]
return obj.commit_time - obj.commit_timezone
# get a list of all the head shas
def get_heads(refs):
todo = []
seenheads = set()
for ref, sha in refs.items():
# refs could contain refs on the server that we haven't pulled down
# the objects for; also make sure it's a sha and not a symref
if ref != b'HEAD' and sha in git_object_store:
obj = git_object_store[sha]
while isinstance(obj, Tag):
obj_type, sha = obj.object
obj = git_object_store[sha]
if isinstance(obj, Commit) and sha not in seenheads:
seenheads.add(sha)
todo.append(sha)
todo.sort(key=commitdate, reverse=True)
return todo
def get_unseen_commits(todo):
'''get all unseen commits reachable from todo in topological order
'unseen' means not reachable from the done set and not in the git map.
Mutates todo and the done set in the process.'''
commits = []
while todo:
sha = todo[-1]
if sha in done or sha in git_map:
todo.pop()
continue
assert isinstance(sha, bytes)
obj = git_object_store[sha]
assert isinstance(obj, Commit)
for p in obj.parents:
if sha in public:
public.add(p)
if p not in done and p not in git_map:
todo.append(p)
# process parents of a commit before processing the
# commit itself, and come back to this commit later
break
else:
commits.append(sha)
done.add(sha)
todo.pop()
return commits
todo = get_heads(refs)
commits = get_unseen_commits(todo)
for sha in reversed(commits):
for p in git_object_store[sha].parents:
if sha in public:
public.add(p)
return [
GitIncomingCommit(
sha,
phases.public if sha in public else phases.draft,
)
for sha in commits
]
class GitIncomingCommit:
'''struct to store result from find_incoming'''
__slots__ = 'sha', 'phase'
def __init__(self, sha, phase):
self.sha = sha
self.phase = phase
@property
def node(self):
return bin(self.sha)
@property
def short(self):
return short(self.node)
def __bytes__(self):
return self.sha
def extract_hg_metadata(message, git_extra):
split = message.split(b"\n--HG--\n", 1)
# Renames are explicitly stored in Mercurial but inferred in Git. For
# commits that originated in Git we'd like to optionally infer rename
# information to store in Mercurial, but for commits that originated in
# Mercurial we'd like to disable this. How do we tell whether the commit
# originated in Mercurial or in Git? We rely on the presence of extra
# hg-git fields in the Git commit.
#
# - Commits exported by hg-git versions past 0.7.0 always store at least
# one hg-git field.
#
# - For commits exported by hg-git versions before 0.7.0, this becomes a
# heuristic: if the commit has any extra hg fields, it definitely
# originated in Mercurial. If the commit doesn't, we aren't really sure.
#
# If we think the commit originated in Mercurial, we set renames to a
# dict. If we don't, we set renames to None. Callers can then determine
# whether to infer rename information.
renames = None
extra = {}
branch = None
if len(split) == 2:
renames = {}
message, meta = split
lines = meta.split(b"\n")
for line in lines:
if line == b'':
continue
if b' : ' not in line:
break
command, data = line.split(b" : ", 1)
if command == b'rename':
before, after = data.split(b" => ", 1)
renames[after] = before
if command == b'branch':
branch = data
if command == b'extra':
k, v = data.split(b" : ", 1)
extra[k] = hgutil.urlreq.unquote(v)
git_fn = 0
for field, data in git_extra:
if field.startswith(b'HG:'):
if renames is None:
renames = {}
command = field[3:]
if command == b'rename':
before, after = data.split(b':', 1)
renames[hgutil.urlreq.unquote(after)] = hgutil.urlreq.unquote(
before
)
elif command == b'extra':
k, v = data.split(b':', 1)
extra[hgutil.urlreq.unquote(k)] = hgutil.urlreq.unquote(v)
else:
# preserve ordering in Git by using an incrementing integer for
# each field. Note that extra metadata in Git is an ordered list
# of pairs.
hg_field = b'GIT%d-%s' % (git_fn, field)
git_fn += 1
extra[hgutil.urlreq.quote(hg_field)] = hgutil.urlreq.quote(data)
return (message, renames, branch, extra)
def convert_git_int_mode(mode):
# TODO: make these into constants
convert = {0o100644: b'', 0o100755: b'x', 0o120000: b'l'}
if mode in convert:
return convert[mode]
return b''
def set_committer_from_author(commit):
commit.committer = commit.author
commit.commit_time = commit.author_time
commit.commit_timezone = commit.author_timezone
def filter_refs(refs, heads):
'''For a dictionary of refs: shas, if heads is None then return refs
that match the heads. Otherwise, return refs that are heads or tags.
'''
filteredrefs = []
if heads is not None:
# contains pairs of ('refs/(heads|tags|...)/foo', 'foo')
# if ref is just '<foo>', then we get ('foo', 'foo')
stripped_refs = [
(r, r[r.find(b'/', r.find(b'/') + 1) + 1 :]) for r in refs
]
for h in heads:
if h.endswith(b'/*'):
prefix = h[:-1] # include the / but not the *
r = [
pair[0]
for pair in stripped_refs
if pair[1].startswith(prefix)
]
r.sort()
filteredrefs.extend(r)
else:
r = [pair[0] for pair in stripped_refs if pair[1] == h]
if not r:
msg = _(b"unknown revision '%s'") % h
raise error.RepoLookupError(msg)
elif len(r) == 1:
filteredrefs.append(r[0])
else:
msg = _(b"ambiguous reference %s: %s")
msg %= (
h,
b', '.join(sorted(r)),
)
raise error.RepoLookupError(msg)
else:
for ref, sha in refs.items():
if not ref.endswith(ANNOTATED_TAG_SUFFIX) and (
ref.startswith(LOCAL_BRANCH_PREFIX)
or ref.startswith(LOCAL_TAG_PREFIX)
or ref == b'HEAD'
):
filteredrefs.append(ref)
filteredrefs.sort()
# the choice of OrderedDict vs plain dict has no impact on stock
# hg-git, but allows extensions to customize the order in which refs
# are returned
return collections.OrderedDict((r, refs[r]) for r in filteredrefs)
def parse_gitmodules(git, tree_obj):
"""Parse .gitmodules from a git tree specified by tree_obj
Returns a list of tuples (submodule path, url, name), where name
is hgutil.urlreq.quoted part of the section's name
Raises KeyError if no modules exist, or ValueError if they're invalid
"""
unused_mode, gitmodules_sha = tree_obj[b'.gitmodules']
gitmodules_content = git[gitmodules_sha].data
with io.BytesIO(gitmodules_content) as fp:
cfg = dul_config.ConfigFile.from_file(fp)
return dul_config.parse_submodules(cfg)
def git_file_readlines(git, tree_obj, fname):
"""Read content of a named entry from the git commit tree
:return: list of lines
"""
if fname in tree_obj:
unused_mode, sha = tree_obj[fname]
content = git[sha].data
return content.splitlines()
return []
|