osr-mono/packages/core/strings.js
2025-01-29 17:48:22 +01:00

609 lines
22 KiB
JavaScript

/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
'use strict';
Object.defineProperty(exports, "__esModule", { value: true });
exports.repeat = exports.safeBtoa = exports.appendWithLimit = exports.startsWithUTF8BOM = exports.UTF8_BOM_CHARACTER = exports.removeAnsiEscapeCodes = exports.lcut = exports.computeLineStarts = exports.difference = exports.isFullWidthCharacter = exports.isBasicASCII = exports.containsRTL = exports.isLowSurrogate = exports.isHighSurrogate = exports.commonSuffixLength = exports.commonPrefixLength = exports.equalsIgnoreCase = exports.compareIgnoreCase = exports.compare = exports.lastNonWhitespaceIndex = exports.getLeadingWhitespace = exports.firstNonWhitespaceIndex = exports.normalizeNFC = exports.canNormalize = exports.regExpLeadsToEndlessLoop = exports.createRegExp = exports.indexOfIgnoreCase = exports.endsWith = exports.startsWith = exports.stripWildcards = exports.convertSimple2RegExpPattern = exports.rtrim = exports.ltrim = exports.trim = exports.escapeRegExpCharacters = exports.escape = exports.format = exports.pad = exports.isFalsyOrWhitespace = exports.substituteAlt = exports.substitute = exports.empty = void 0;
const map_1 = require("./map");
exports.empty = '';
const constants_1 = require("./constants");
const substitute = (template, map) => {
const transform = (k) => k || '';
return template.replace(constants_1.REGEX_VAR, (match, key, format) => transform(map[key]).toString());
};
exports.substitute = substitute;
const substituteAlt = (template, map) => {
const transform = (k) => k || '';
return template.replace(constants_1.REGEX_VAR_ALT, (match, key, format) => transform(map[key]).toString());
};
exports.substituteAlt = substituteAlt;
function isFalsyOrWhitespace(str) {
if (!str || typeof str !== 'string') {
return true;
}
return str.trim().length === 0;
}
exports.isFalsyOrWhitespace = isFalsyOrWhitespace;
/**
* @returns the provided number with the given number of preceding zeros.
*/
function pad(n, l, char = '0') {
let str = '' + n;
let r = [str];
for (let i = str.length; i < l; i++) {
r.push(char);
}
return r.reverse().join('');
}
exports.pad = pad;
const _formatRegexp = /{(\d+)}/g;
/**
* Helper to produce a string with a variable number of arguments. Insert variable segments
* into the string using the {n} notation where N is the index of the argument following the string.
* @param value string to which formatting is applied
* @param args replacements for {n}-entries
*/
function format(value, ...args) {
if (args.length === 0) {
return value;
}
return value.replace(_formatRegexp, function (match, group) {
let idx = parseInt(group, 10);
return isNaN(idx) || idx < 0 || idx >= args.length ?
match :
args[idx];
});
}
exports.format = format;
/**
* Converts HTML characters inside the string to use entities instead. Makes the string safe from
* being used e.g. in HTMLElement.innerHTML.
*/
function escape(html) {
return html.replace(/[<|>|&]/g, function (match) {
switch (match) {
case '<': return '&lt;';
case '>': return '&gt;';
case '&': return '&amp;';
default: return match;
}
});
}
exports.escape = escape;
/**
* Escapes regular expression characters in a given string
*/
function escapeRegExpCharacters(value) {
return value.replace(/[\-\\\{\}\*\+\?\|\^\$\.\,\[\]\(\)\#\s]/g, '\\$&');
}
exports.escapeRegExpCharacters = escapeRegExpCharacters;
/**
* Removes all occurrences of needle from the beginning and end of haystack.
* @param haystack string to trim
* @param needle the thing to trim (default is a blank)
*/
function trim(haystack, needle = ' ') {
let trimmed = ltrim(haystack, needle);
return rtrim(trimmed, needle);
}
exports.trim = trim;
/**
* Removes all occurrences of needle from the beginning of haystack.
* @param haystack string to trim
* @param needle the thing to trim
*/
function ltrim(haystack, needle) {
if (!haystack || !needle) {
return haystack;
}
let needleLen = needle.length;
if (needleLen === 0 || haystack.length === 0) {
return haystack;
}
let offset = 0, idx = -1;
while ((idx = haystack.indexOf(needle, offset)) === offset) {
offset = offset + needleLen;
}
return haystack.substring(offset);
}
exports.ltrim = ltrim;
/**
* Removes all occurrences of needle from the end of haystack.
* @param haystack string to trim
* @param needle the thing to trim
*/
function rtrim(haystack, needle) {
if (!haystack || !needle) {
return haystack;
}
let needleLen = needle.length, haystackLen = haystack.length;
if (needleLen === 0 || haystackLen === 0) {
return haystack;
}
let offset = haystackLen, idx = -1;
while (true) {
idx = haystack.lastIndexOf(needle, offset - 1);
if (idx === -1 || idx + needleLen !== offset) {
break;
}
if (idx === 0) {
return '';
}
offset = idx;
}
return haystack.substring(0, offset);
}
exports.rtrim = rtrim;
function convertSimple2RegExpPattern(pattern) {
return pattern.replace(/[\-\\\{\}\+\?\|\^\$\.\,\[\]\(\)\#\s]/g, '\\$&').replace(/[\*]/g, '.*');
}
exports.convertSimple2RegExpPattern = convertSimple2RegExpPattern;
function stripWildcards(pattern) {
return pattern.replace(/\*/g, '');
}
exports.stripWildcards = stripWildcards;
/**
* Determines if haystack starts with needle.
*/
function startsWith(haystack, needle) {
if (haystack.length < needle.length) {
return false;
}
for (let i = 0; i < needle.length; i++) {
if (haystack[i] !== needle[i]) {
return false;
}
}
return true;
}
exports.startsWith = startsWith;
/**
* Determines if haystack ends with needle.
*/
function endsWith(haystack, needle) {
let diff = haystack.length - needle.length;
if (diff > 0) {
return haystack.indexOf(needle, diff) === diff;
}
else if (diff === 0) {
return haystack === needle;
}
else {
return false;
}
}
exports.endsWith = endsWith;
function indexOfIgnoreCase(haystack, needle, position = 0) {
let index = haystack.indexOf(needle, position);
if (index < 0) {
if (position > 0) {
haystack = haystack.substr(position);
}
needle = escapeRegExpCharacters(needle);
index = haystack.search(new RegExp(needle, 'i'));
}
return index;
}
exports.indexOfIgnoreCase = indexOfIgnoreCase;
function createRegExp(searchString, isRegex, options = {}) {
if (searchString === '') {
throw new Error('Cannot create regex from empty string');
}
if (!isRegex) {
searchString = searchString.replace(/[\-\\\{\}\*\+\?\|\^\$\.\,\[\]\(\)\#\s]/g, '\\$&');
}
if (options.wholeWord) {
if (!/\B/.test(searchString.charAt(0))) {
searchString = '\\b' + searchString;
}
if (!/\B/.test(searchString.charAt(searchString.length - 1))) {
searchString = searchString + '\\b';
}
}
let modifiers = '';
if (options.global) {
modifiers += 'g';
}
if (!options.matchCase) {
modifiers += 'i';
}
if (options.multiline) {
modifiers += 'm';
}
return new RegExp(searchString, modifiers);
}
exports.createRegExp = createRegExp;
function regExpLeadsToEndlessLoop(regexp) {
// Exit early if it's one of these special cases which are meant to match
// against an empty string
if (regexp.source === '^' || regexp.source === '^$' || regexp.source === '$') {
return false;
}
// We check against an empty string. If the regular expression doesn't advance
// (e.g. ends in an endless loop) it will match an empty string.
let match = regexp.exec('');
return (match && regexp.lastIndex === 0);
}
exports.regExpLeadsToEndlessLoop = regExpLeadsToEndlessLoop;
/**
* The normalize() method returns the Unicode Normalization Form of a given string. The form will be
* the Normalization Form Canonical Composition.
*
* @see {@link https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/normalize}
*/
exports.canNormalize = typeof (''.normalize) === 'function';
const nonAsciiCharactersPattern = /[^\u0000-\u0080]/;
const normalizedCache = new map_1.BoundedLinkedMap(10000); // bounded to 10000 elements
function normalizeNFC(str) {
if (!exports.canNormalize || !str) {
return str;
}
const cached = normalizedCache.get(str);
if (cached) {
return cached;
}
let res;
if (nonAsciiCharactersPattern.test(str)) {
res = str.normalize('NFC');
}
else {
res = str;
}
// Use the cache for fast lookup
normalizedCache.set(str, res);
return res;
}
exports.normalizeNFC = normalizeNFC;
/**
* Returns first index of the string that is not whitespace.
* If string is empty or contains only whitespaces, returns -1
*/
function firstNonWhitespaceIndex(str) {
for (let i = 0, len = str.length; i < len; i++) {
let chCode = str.charCodeAt(i);
if (chCode !== 32 /* CharCode.Space */ && chCode !== 9 /* CharCode.Tab */) {
return i;
}
}
return -1;
}
exports.firstNonWhitespaceIndex = firstNonWhitespaceIndex;
/**
* Returns the leading whitespace of the string.
* If the string contains only whitespaces, returns entire string
*/
function getLeadingWhitespace(str) {
for (let i = 0, len = str.length; i < len; i++) {
let chCode = str.charCodeAt(i);
if (chCode !== 32 /* CharCode.Space */ && chCode !== 9 /* CharCode.Tab */) {
return str.substring(0, i);
}
}
return str;
}
exports.getLeadingWhitespace = getLeadingWhitespace;
/**
* Returns last index of the string that is not whitespace.
* If string is empty or contains only whitespaces, returns -1
*/
function lastNonWhitespaceIndex(str, startIndex = str.length - 1) {
for (let i = startIndex; i >= 0; i--) {
let chCode = str.charCodeAt(i);
if (chCode !== 32 /* CharCode.Space */ && chCode !== 9 /* CharCode.Tab */) {
return i;
}
}
return -1;
}
exports.lastNonWhitespaceIndex = lastNonWhitespaceIndex;
function compare(a, b) {
if (a < b) {
return -1;
}
else if (a > b) {
return 1;
}
else {
return 0;
}
}
exports.compare = compare;
function compareIgnoreCase(a, b) {
const len = Math.min(a.length, b.length);
for (let i = 0; i < len; i++) {
const codeA = a.charCodeAt(i);
const codeB = b.charCodeAt(i);
if (codeA === codeB) {
// equal
continue;
}
if (isAsciiLetter(codeA) && isAsciiLetter(codeB)) {
const diff = codeA - codeB;
if (diff === 32 || diff === -32) {
// equal -> ignoreCase
continue;
}
else {
return diff;
}
}
else {
return compare(a.toLowerCase(), b.toLowerCase());
}
}
if (a.length < b.length) {
return -1;
}
else if (a.length > b.length) {
return 1;
}
else {
return 0;
}
}
exports.compareIgnoreCase = compareIgnoreCase;
function isAsciiLetter(code) {
return (code >= 97 /* CharCode.a */ && code <= 122 /* CharCode.z */) || (code >= 65 /* CharCode.A */ && code <= 90 /* CharCode.Z */);
}
function equalsIgnoreCase(a, b) {
let len1 = a.length, len2 = b.length;
if (len1 !== len2) {
return false;
}
for (let i = 0; i < len1; i++) {
let codeA = a.charCodeAt(i), codeB = b.charCodeAt(i);
if (codeA === codeB) {
continue;
}
else if (isAsciiLetter(codeA) && isAsciiLetter(codeB)) {
let diff = Math.abs(codeA - codeB);
if (diff !== 0 && diff !== 32) {
return false;
}
}
else {
if (String.fromCharCode(codeA).toLocaleLowerCase() !== String.fromCharCode(codeB).toLocaleLowerCase()) {
return false;
}
}
}
return true;
}
exports.equalsIgnoreCase = equalsIgnoreCase;
/**
* @returns the length of the common prefix of the two strings.
*/
function commonPrefixLength(a, b) {
let i, len = Math.min(a.length, b.length);
for (i = 0; i < len; i++) {
if (a.charCodeAt(i) !== b.charCodeAt(i)) {
return i;
}
}
return len;
}
exports.commonPrefixLength = commonPrefixLength;
/**
* @returns the length of the common suffix of the two strings.
*/
function commonSuffixLength(a, b) {
let i, len = Math.min(a.length, b.length);
let aLastIndex = a.length - 1;
let bLastIndex = b.length - 1;
for (i = 0; i < len; i++) {
if (a.charCodeAt(aLastIndex - i) !== b.charCodeAt(bLastIndex - i)) {
return i;
}
}
return len;
}
exports.commonSuffixLength = commonSuffixLength;
// --- unicode
// http://en.wikipedia.org/wiki/Surrogate_pair
// Returns the code point starting at a specified index in a string
// Code points U+0000 to U+D7FF and U+E000 to U+FFFF are represented on a single character
// Code points U+10000 to U+10FFFF are represented on two consecutive characters
//export function getUnicodePoint(str:string, index:number, len:number):number {
// let chrCode = str.charCodeAt(index);
// if (0xD800 <= chrCode && chrCode <= 0xDBFF && index + 1 < len) {
// let nextChrCode = str.charCodeAt(index + 1);
// if (0xDC00 <= nextChrCode && nextChrCode <= 0xDFFF) {
// return (chrCode - 0xD800) << 10 + (nextChrCode - 0xDC00) + 0x10000;
// }
// }
// return chrCode;
//}
function isHighSurrogate(charCode) {
return (0xD800 <= charCode && charCode <= 0xDBFF);
}
exports.isHighSurrogate = isHighSurrogate;
function isLowSurrogate(charCode) {
return (0xDC00 <= charCode && charCode <= 0xDFFF);
}
exports.isLowSurrogate = isLowSurrogate;
/**
* Generated using https://github.com/alexandrudima/unicode-utils/blob/master/generate-rtl-test.js
*/
const CONTAINS_RTL = /(?:[\u05BE\u05C0\u05C3\u05C6\u05D0-\u05F4\u0608\u060B\u060D\u061B-\u064A\u066D-\u066F\u0671-\u06D5\u06E5\u06E6\u06EE\u06EF\u06FA-\u0710\u0712-\u072F\u074D-\u07A5\u07B1-\u07EA\u07F4\u07F5\u07FA-\u0815\u081A\u0824\u0828\u0830-\u0858\u085E-\u08BD\u200F\uFB1D\uFB1F-\uFB28\uFB2A-\uFD3D\uFD50-\uFDFC\uFE70-\uFEFC]|\uD802[\uDC00-\uDD1B\uDD20-\uDE00\uDE10-\uDE33\uDE40-\uDEE4\uDEEB-\uDF35\uDF40-\uDFFF]|\uD803[\uDC00-\uDCFF]|\uD83A[\uDC00-\uDCCF\uDD00-\uDD43\uDD50-\uDFFF]|\uD83B[\uDC00-\uDEBB])/;
/**
* Returns true if `str` contains any Unicode character that is classified as "R" or "AL".
*/
function containsRTL(str) {
return CONTAINS_RTL.test(str);
}
exports.containsRTL = containsRTL;
const IS_BASIC_ASCII = /^[\t\n\r\x20-\x7E]*$/;
/**
* Returns true if `str` contains only basic ASCII characters in the range 32 - 126 (including 32 and 126) or \n, \r, \t
*/
function isBasicASCII(str) {
return IS_BASIC_ASCII.test(str);
}
exports.isBasicASCII = isBasicASCII;
function isFullWidthCharacter(charCode) {
// Do a cheap trick to better support wrapping of wide characters, treat them as 2 columns
// http://jrgraphix.net/research/unicode_blocks.php
// 2E80 — 2EFF CJK Radicals Supplement
// 2F00 — 2FDF Kangxi Radicals
// 2FF0 — 2FFF Ideographic Description Characters
// 3000 — 303F CJK Symbols and Punctuation
// 3040 — 309F Hiragana
// 30A0 — 30FF Katakana
// 3100 — 312F Bopomofo
// 3130 — 318F Hangul Compatibility Jamo
// 3190 — 319F Kanbun
// 31A0 — 31BF Bopomofo Extended
// 31F0 — 31FF Katakana Phonetic Extensions
// 3200 — 32FF Enclosed CJK Letters and Months
// 3300 — 33FF CJK Compatibility
// 3400 — 4DBF CJK Unified Ideographs Extension A
// 4DC0 — 4DFF Yijing Hexagram Symbols
// 4E00 — 9FFF CJK Unified Ideographs
// A000 — A48F Yi Syllables
// A490 — A4CF Yi Radicals
// AC00 — D7AF Hangul Syllables
// [IGNORE] D800 — DB7F High Surrogates
// [IGNORE] DB80 — DBFF High Private Use Surrogates
// [IGNORE] DC00 — DFFF Low Surrogates
// [IGNORE] E000 — F8FF Private Use Area
// F900 — FAFF CJK Compatibility Ideographs
// [IGNORE] FB00 — FB4F Alphabetic Presentation Forms
// [IGNORE] FB50 — FDFF Arabic Presentation Forms-A
// [IGNORE] FE00 — FE0F Variation Selectors
// [IGNORE] FE20 — FE2F Combining Half Marks
// [IGNORE] FE30 — FE4F CJK Compatibility Forms
// [IGNORE] FE50 — FE6F Small Form Variants
// [IGNORE] FE70 — FEFF Arabic Presentation Forms-B
// FF00 — FFEF Halfwidth and Fullwidth Forms
// [https://en.wikipedia.org/wiki/Halfwidth_and_fullwidth_forms]
// of which FF01 - FF5E fullwidth ASCII of 21 to 7E
// [IGNORE] and FF65 - FFDC halfwidth of Katakana and Hangul
// [IGNORE] FFF0 — FFFF Specials
charCode = +charCode; // @perf
return ((charCode >= 0x2E80 && charCode <= 0xD7AF)
|| (charCode >= 0xF900 && charCode <= 0xFAFF)
|| (charCode >= 0xFF01 && charCode <= 0xFF5E));
}
exports.isFullWidthCharacter = isFullWidthCharacter;
/**
* Computes the difference score for two strings. More similar strings have a higher score.
* We use largest common subsequence dynamic programming approach but penalize in the end for length differences.
* Strings that have a large length difference will get a bad default score 0.
* Complexity - both time and space O(first.length * second.length)
* Dynamic programming LCS computation http://en.wikipedia.org/wiki/Longest_common_subsequence_problem
*
* @param first a string
* @param second a string
*/
function difference(first, second, maxLenDelta = 4) {
let lengthDifference = Math.abs(first.length - second.length);
// We only compute score if length of the currentWord and length of entry.name are similar.
if (lengthDifference > maxLenDelta) {
return 0;
}
// Initialize LCS (largest common subsequence) matrix.
let LCS = [];
let zeroArray = [];
let i, j;
for (i = 0; i < second.length + 1; ++i) {
zeroArray.push(0);
}
for (i = 0; i < first.length + 1; ++i) {
LCS.push(zeroArray);
}
for (i = 1; i < first.length + 1; ++i) {
for (j = 1; j < second.length + 1; ++j) {
if (first[i - 1] === second[j - 1]) {
LCS[i][j] = LCS[i - 1][j - 1] + 1;
}
else {
LCS[i][j] = Math.max(LCS[i - 1][j], LCS[i][j - 1]);
}
}
}
return LCS[first.length][second.length] - Math.sqrt(lengthDifference);
}
exports.difference = difference;
/**
* Returns an array in which every entry is the offset of a
* line. There is always one entry which is zero.
*/
function computeLineStarts(text) {
let regexp = /\r\n|\r|\n/g, ret = [0], match;
while ((match = regexp.exec(text))) {
ret.push(regexp.lastIndex);
}
return ret;
}
exports.computeLineStarts = computeLineStarts;
/**
* Given a string and a max length returns a shorted version. Shorting
* happens at favorable positions - such as whitespace or punctuation characters.
*/
function lcut(text, n) {
if (text.length < n) {
return text;
}
let segments = text.split(/\b/), count = 0;
for (let i = segments.length - 1; i >= 0; i--) {
count += segments[i].length;
if (count > n) {
segments.splice(0, i);
break;
}
}
return segments.join(exports.empty).replace(/^\s/, exports.empty);
}
exports.lcut = lcut;
// Escape codes
// http://en.wikipedia.org/wiki/ANSI_escape_code
const EL = /\x1B\x5B[12]?K/g; // Erase in line
const COLOR_START = /\x1b\[\d+m/g; // Color
const COLOR_END = /\x1b\[0?m/g; // Color
function removeAnsiEscapeCodes(str) {
if (str) {
str = str.replace(EL, '');
str = str.replace(COLOR_START, '');
str = str.replace(COLOR_END, '');
}
return str;
}
exports.removeAnsiEscapeCodes = removeAnsiEscapeCodes;
// -- UTF-8 BOM
exports.UTF8_BOM_CHARACTER = String.fromCharCode(65279 /* CharCode.UTF8_BOM */);
function startsWithUTF8BOM(str) {
return (str && str.length > 0 && str.charCodeAt(0) === 65279 /* CharCode.UTF8_BOM */);
}
exports.startsWithUTF8BOM = startsWithUTF8BOM;
/**
* Appends two strings. If the appended result is longer than maxLength,
* trims the start of the result and replaces it with '...'.
*/
function appendWithLimit(first, second, maxLength) {
const newLength = first.length + second.length;
if (newLength > maxLength) {
first = '...' + first.substr(newLength - maxLength);
}
if (second.length > maxLength) {
first += second.substr(second.length - maxLength);
}
else {
first += second;
}
return first;
}
exports.appendWithLimit = appendWithLimit;
function safeBtoa(str) {
return btoa(encodeURIComponent(str)); // we use encodeURIComponent because btoa fails for non Latin 1 values
}
exports.safeBtoa = safeBtoa;
function repeat(s, count) {
let result = '';
for (let i = 0; i < count; i++) {
result += s;
}
return result;
}
exports.repeat = repeat;
//# sourceMappingURL=strings.js.map