|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221 |
- 'use strict';
- // TODO: Use the `URL` global when targeting Node.js 10
- const URLParser = typeof URL === 'undefined' ? require('url').URL : URL;
-
- // https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs
- const DATA_URL_DEFAULT_MIME_TYPE = 'text/plain';
- const DATA_URL_DEFAULT_CHARSET = 'us-ascii';
-
- const testParameter = (name, filters) => {
- return filters.some(filter => filter instanceof RegExp ? filter.test(name) : filter === name);
- };
-
- const normalizeDataURL = (urlString, {stripHash}) => {
- const parts = urlString.match(/^data:([^,]*?),([^#]*?)(?:#(.*))?$/);
-
- if (!parts) {
- throw new Error(`Invalid URL: ${urlString}`);
- }
-
- const mediaType = parts[1].split(';');
- const body = parts[2];
- const hash = stripHash ? '' : parts[3];
-
- let base64 = false;
-
- if (mediaType[mediaType.length - 1] === 'base64') {
- mediaType.pop();
- base64 = true;
- }
-
- // Lowercase MIME type
- const mimeType = (mediaType.shift() || '').toLowerCase();
- const attributes = mediaType
- .map(attribute => {
- let [key, value = ''] = attribute.split('=').map(string => string.trim());
-
- // Lowercase `charset`
- if (key === 'charset') {
- value = value.toLowerCase();
-
- if (value === DATA_URL_DEFAULT_CHARSET) {
- return '';
- }
- }
-
- return `${key}${value ? `=${value}` : ''}`;
- })
- .filter(Boolean);
-
- const normalizedMediaType = [
- ...attributes
- ];
-
- if (base64) {
- normalizedMediaType.push('base64');
- }
-
- if (normalizedMediaType.length !== 0 || (mimeType && mimeType !== DATA_URL_DEFAULT_MIME_TYPE)) {
- normalizedMediaType.unshift(mimeType);
- }
-
- return `data:${normalizedMediaType.join(';')},${base64 ? body.trim() : body}${hash ? `#${hash}` : ''}`;
- };
-
- const normalizeUrl = (urlString, options) => {
- options = {
- defaultProtocol: 'http:',
- normalizeProtocol: true,
- forceHttp: false,
- forceHttps: false,
- stripAuthentication: true,
- stripHash: false,
- stripWWW: true,
- removeQueryParameters: [/^utm_\w+/i],
- removeTrailingSlash: true,
- removeDirectoryIndex: false,
- sortQueryParameters: true,
- ...options
- };
-
- // TODO: Remove this at some point in the future
- if (Reflect.has(options, 'normalizeHttps')) {
- throw new Error('options.normalizeHttps is renamed to options.forceHttp');
- }
-
- if (Reflect.has(options, 'normalizeHttp')) {
- throw new Error('options.normalizeHttp is renamed to options.forceHttps');
- }
-
- if (Reflect.has(options, 'stripFragment')) {
- throw new Error('options.stripFragment is renamed to options.stripHash');
- }
-
- urlString = urlString.trim();
-
- // Data URL
- if (/^data:/i.test(urlString)) {
- return normalizeDataURL(urlString, options);
- }
-
- const hasRelativeProtocol = urlString.startsWith('//');
- const isRelativeUrl = !hasRelativeProtocol && /^\.*\//.test(urlString);
-
- // Prepend protocol
- if (!isRelativeUrl) {
- urlString = urlString.replace(/^(?!(?:\w+:)?\/\/)|^\/\//, options.defaultProtocol);
- }
-
- const urlObj = new URLParser(urlString);
-
- if (options.forceHttp && options.forceHttps) {
- throw new Error('The `forceHttp` and `forceHttps` options cannot be used together');
- }
-
- if (options.forceHttp && urlObj.protocol === 'https:') {
- urlObj.protocol = 'http:';
- }
-
- if (options.forceHttps && urlObj.protocol === 'http:') {
- urlObj.protocol = 'https:';
- }
-
- // Remove auth
- if (options.stripAuthentication) {
- urlObj.username = '';
- urlObj.password = '';
- }
-
- // Remove hash
- if (options.stripHash) {
- urlObj.hash = '';
- }
-
- // Remove duplicate slashes if not preceded by a protocol
- if (urlObj.pathname) {
- // TODO: Use the following instead when targeting Node.js 10
- // `urlObj.pathname = urlObj.pathname.replace(/(?<!https?:)\/{2,}/g, '/');`
- urlObj.pathname = urlObj.pathname.replace(/((?!:).|^)\/{2,}/g, (_, p1) => {
- if (/^(?!\/)/g.test(p1)) {
- return `${p1}/`;
- }
-
- return '/';
- });
- }
-
- // Decode URI octets
- if (urlObj.pathname) {
- urlObj.pathname = decodeURI(urlObj.pathname);
- }
-
- // Remove directory index
- if (options.removeDirectoryIndex === true) {
- options.removeDirectoryIndex = [/^index\.[a-z]+$/];
- }
-
- if (Array.isArray(options.removeDirectoryIndex) && options.removeDirectoryIndex.length > 0) {
- let pathComponents = urlObj.pathname.split('/');
- const lastComponent = pathComponents[pathComponents.length - 1];
-
- if (testParameter(lastComponent, options.removeDirectoryIndex)) {
- pathComponents = pathComponents.slice(0, pathComponents.length - 1);
- urlObj.pathname = pathComponents.slice(1).join('/') + '/';
- }
- }
-
- if (urlObj.hostname) {
- // Remove trailing dot
- urlObj.hostname = urlObj.hostname.replace(/\.$/, '');
-
- // Remove `www.`
- if (options.stripWWW && /^www\.([a-z\-\d]{2,63})\.([a-z.]{2,5})$/.test(urlObj.hostname)) {
- // Each label should be max 63 at length (min: 2).
- // The extension should be max 5 at length (min: 2).
- // Source: https://en.wikipedia.org/wiki/Hostname#Restrictions_on_valid_host_names
- urlObj.hostname = urlObj.hostname.replace(/^www\./, '');
- }
- }
-
- // Remove query unwanted parameters
- if (Array.isArray(options.removeQueryParameters)) {
- for (const key of [...urlObj.searchParams.keys()]) {
- if (testParameter(key, options.removeQueryParameters)) {
- urlObj.searchParams.delete(key);
- }
- }
- }
-
- // Sort query parameters
- if (options.sortQueryParameters) {
- urlObj.searchParams.sort();
- }
-
- if (options.removeTrailingSlash) {
- urlObj.pathname = urlObj.pathname.replace(/\/$/, '');
- }
-
- // Take advantage of many of the Node `url` normalizations
- urlString = urlObj.toString();
-
- // Remove ending `/`
- if ((options.removeTrailingSlash || urlObj.pathname === '/') && urlObj.hash === '') {
- urlString = urlString.replace(/\/$/, '');
- }
-
- // Restore relative protocol, if applicable
- if (hasRelativeProtocol && !options.normalizeProtocol) {
- urlString = urlString.replace(/^http:\/\//, '//');
- }
-
- // Remove http/https
- if (options.stripProtocol) {
- urlString = urlString.replace(/^(?:https?:)?\/\//, '');
- }
-
- return urlString;
- };
-
- module.exports = normalizeUrl;
- // TODO: Remove this for the next major release
- module.exports.default = normalizeUrl;
|