'use strict' const url = require('url') const gitHosts = require('./git-host-info.js') const GitHost = module.exports = require('./git-host.js') const LRU = require('lru-cache') const cache = new LRU({ max: 1000 }) const protocolToRepresentationMap = { 'git+ssh:': 'sshurl', 'git+https:': 'https', 'ssh:': 'sshurl', 'git:': 'git' } function protocolToRepresentation (protocol) { return protocolToRepresentationMap[protocol] || protocol.slice(0, -1) } const authProtocols = { 'git:': true, 'https:': true, 'git+https:': true, 'http:': true, 'git+http:': true } const knownProtocols = Object.keys(gitHosts.byShortcut).concat(['http:', 'https:', 'git:', 'git+ssh:', 'git+https:', 'ssh:']) module.exports.fromUrl = function (giturl, opts) { if (typeof giturl !== 'string') { return } const key = giturl + JSON.stringify(opts || {}) if (!cache.has(key)) { cache.set(key, fromUrl(giturl, opts)) } return cache.get(key) } function fromUrl (giturl, opts) { if (!giturl) { return } const url = isGitHubShorthand(giturl) ? 'github:' + giturl : correctProtocol(giturl) const parsed = parseGitUrl(url) if (!parsed) { return parsed } const gitHostShortcut = gitHosts.byShortcut[parsed.protocol] const gitHostDomain = gitHosts.byDomain[parsed.hostname.startsWith('www.') ? parsed.hostname.slice(4) : parsed.hostname] const gitHostName = gitHostShortcut || gitHostDomain if (!gitHostName) { return } const gitHostInfo = gitHosts[gitHostShortcut || gitHostDomain] let auth = null if (authProtocols[parsed.protocol] && (parsed.username || parsed.password)) { auth = `${parsed.username}${parsed.password ? ':' + parsed.password : ''}` } let committish = null let user = null let project = null let defaultRepresentation = null try { if (gitHostShortcut) { let pathname = parsed.pathname.startsWith('/') ? parsed.pathname.slice(1) : parsed.pathname const firstAt = pathname.indexOf('@') // we ignore auth for shortcuts, so just trim it out if (firstAt > -1) { pathname = pathname.slice(firstAt + 1) } const lastSlash = pathname.lastIndexOf('/') if (lastSlash > -1) { user = decodeURIComponent(pathname.slice(0, lastSlash)) // we want nulls only, never empty strings if (!user) { user = null } project = decodeURIComponent(pathname.slice(lastSlash + 1)) } else { project = decodeURIComponent(pathname) } if (project.endsWith('.git')) { project = project.slice(0, -4) } if (parsed.hash) { committish = decodeURIComponent(parsed.hash.slice(1)) } defaultRepresentation = 'shortcut' } else { if (!gitHostInfo.protocols.includes(parsed.protocol)) { return } const segments = gitHostInfo.extract(parsed) if (!segments) { return } user = segments.user && decodeURIComponent(segments.user) project = decodeURIComponent(segments.project) committish = decodeURIComponent(segments.committish) defaultRepresentation = protocolToRepresentation(parsed.protocol) } } catch (err) { /* istanbul ignore else */ if (err instanceof URIError) { return } else { throw err } } return new GitHost(gitHostName, user, auth, project, committish, defaultRepresentation, opts) } // accepts input like git:github.com:user/repo and inserts the // after the first : const correctProtocol = (arg) => { const firstColon = arg.indexOf(':') const proto = arg.slice(0, firstColon + 1) if (knownProtocols.includes(proto)) { return arg } const firstAt = arg.indexOf('@') if (firstAt > -1) { if (firstAt > firstColon) { return `git+ssh://${arg}` } else { return arg } } const doubleSlash = arg.indexOf('//') if (doubleSlash === firstColon + 1) { return arg } return arg.slice(0, firstColon + 1) + '//' + arg.slice(firstColon + 1) } // look for github shorthand inputs, such as npm/cli const isGitHubShorthand = (arg) => { // it cannot contain whitespace before the first # // it cannot start with a / because that's probably an absolute file path // but it must include a slash since repos are username/repository // it cannot start with a . because that's probably a relative file path // it cannot start with an @ because that's a scoped package if it passes the other tests // it cannot contain a : before a # because that tells us that there's a protocol // a second / may not exist before a # const firstHash = arg.indexOf('#') const firstSlash = arg.indexOf('/') const secondSlash = arg.indexOf('/', firstSlash + 1) const firstColon = arg.indexOf(':') const firstSpace = /\s/.exec(arg) const firstAt = arg.indexOf('@') const spaceOnlyAfterHash = !firstSpace || (firstHash > -1 && firstSpace.index > firstHash) const atOnlyAfterHash = firstAt === -1 || (firstHash > -1 && firstAt > firstHash) const colonOnlyAfterHash = firstColon === -1 || (firstHash > -1 && firstColon > firstHash) const secondSlashOnlyAfterHash = secondSlash === -1 || (firstHash > -1 && secondSlash > firstHash) const hasSlash = firstSlash > 0 // if a # is found, what we really want to know is that the character immediately before # is not a / const doesNotEndWithSlash = firstHash > -1 ? arg[firstHash - 1] !== '/' : !arg.endsWith('/') const doesNotStartWithDot = !arg.startsWith('.') return spaceOnlyAfterHash && hasSlash && doesNotEndWithSlash && doesNotStartWithDot && atOnlyAfterHash && colonOnlyAfterHash && secondSlashOnlyAfterHash } // attempt to correct an scp style url so that it will parse with `new URL()` const correctUrl = (giturl) => { const firstAt = giturl.indexOf('@') const lastHash = giturl.lastIndexOf('#') let firstColon = giturl.indexOf(':') let lastColon = giturl.lastIndexOf(':', lastHash > -1 ? lastHash : Infinity) let corrected if (lastColon > firstAt) { // the last : comes after the first @ (or there is no @) // like it would in: // proto://hostname.com:user/repo // username@hostname.com:user/repo // :password@hostname.com:user/repo // username:password@hostname.com:user/repo // proto://username@hostname.com:user/repo // proto://:password@hostname.com:user/repo // proto://username:password@hostname.com:user/repo // then we replace the last : with a / to create a valid path corrected = giturl.slice(0, lastColon) + '/' + giturl.slice(lastColon + 1) // // and we find our new : positions firstColon = corrected.indexOf(':') lastColon = corrected.lastIndexOf(':') } if (firstColon === -1 && giturl.indexOf('//') === -1) { // we have no : at all // as it would be in: // username@hostname.com/user/repo // then we prepend a protocol corrected = `git+ssh://${corrected}` } return corrected } // try to parse the url as its given to us, if that throws // then we try to clean the url and parse that result instead // THIS FUNCTION SHOULD NEVER THROW const parseGitUrl = (giturl) => { let result try { result = new url.URL(giturl) } catch (err) {} if (result) { return result } const correctedUrl = correctUrl(giturl) try { result = new url.URL(correctedUrl) } catch (err) {} return result }