Avatar of the person chatting

Re-write this using typescript and es2021 ```js /** Based on https://gist.github.com/JobLeonard/4bbc5cd5952f25560d59abb2d056f2a0 by @JobLeonard */ /* basic ranges of printable UTF16 values (as found in LZ-string): [32, 127), [160, 55296), [63744, 65536) We also have filter out string characters like: " (34) ' (39) ` (44) (Forward tick is safe: ´ (96)) So: 32, 33, [35, 39), [40, 44), [45, 127), [160, 55296), [63744, 65536) */ // integer to unicode: export function itou(i: number) { i += 32; if (i > 33 && i < 39) { i++; } else if (i > 38 && i < 44) { i += 2; } else if (i > 43 && i < 127) { i += 3; } else if (i > 126 && i < 55258) { i += 37; // === 160 - 128 + 3 } else if (i > 55295) { i += 8485; // === 63744 - 55296 + 37 } return String.fromCharCode(i); } export function utoi(i: number) { return i - (i > 63743 ? 8517 : i > 159 ? 69 : i > 46 && i < 130 ? 35 : i > 40 && i < 46 ? 34 : i > 34 && i < 40 ? 33 : 32); } function _node (val) { return { v: val, d: {} }; } export function compress(input: Uint8Array) { if (input === null) return ''; let i = 0, j = 0, value = 0, dictionary = { d: {} }, freshNode = true, c = 0, node = _node(2), // first node will always be initialised like this. nextNode, enlargeIn = 2, dictSize = 3, numBits = 2, data = [], data_val = 0, data_position = 0; if (input.length) { // Write length of the input as the first four unicode characters, // Or 45 bits. 1<<45 bytes is about 35 terabytes, so more than enough. value = input.length; data.push(itou(value / 40000000 & 0x7FFF)); data.push(itou((value >>> 15) & 0x7FFF)); data.push(itou(value & 0x7FFF)); // If there is an array, the first byte is guaranteed to // be new, so we write it to output stream, and add it to the // dictionary. For the same reason we can initialize freshNode // as true, and new_node, node and dictSize as if // it was already added to the dictionary (see above). c = input[0]; // === Write first byte token to output == // insert new byte token into bitstream for (i = 0; i < numBits; i++) { // Value for "new token" is 0 data_val <<= 1; if (++data_position === 15) { data_position = 0; data.push(itou(data_val)); data_val = 0; } } // insert byt bits into bitstream for (i = 0; i < 8; i++) { // shifting has precedence over bitmasking data_val = c >> i & 1 | data_val << 1; if (++data_position === 15) { data_position = 0; data.push(itou(data_val)); data_val = 0; } } // Add charCode to the dictionary. dictionary[c] = node; for (j = 1; j < input.length; j++) { c = input[j]; // does the new charCode match an existing prefix? nextNode = node.d[c]; if (nextNode) { // continue with next prefix node = nextNode; } else { // Prefix+charCode does not exist in trie yet. // We write the prefix to the bitstream, and add // the new charCode to the dictionary if it's new // Then we set `node` to the root node matching // the charCode. if (freshNode) { // Prefix is a freshly added character token, // which was already written to the bitstream freshNode = false; } else { // write out the current prefix token value = node.v; for (i = 0; i < numBits; i++) { // shifting has precedence over bitmasking data_val = value >> i & 1 | data_val << 1; if (++data_position === 15) { data_position = 0; data.push(itou(data_val)); data_val = 0; } } } // Is the byte a new byte // that needs to be stored at the root? if (dictionary[c] === undefined) { // increase token bitlength if necessary if (--enlargeIn === 0) { enlargeIn = 1 << numBits++; } // insert new byte token for (i = 0; i < numBits; i++) { data_val <<= 1; if (++data_position === 15) { data_position = 0; data.push(itou(data_val)); data_val = 0; } } for (i = 0; i < 8; i++) { data_val = c >> i & 1 | data_val << 1; if (++data_position === 15) { data_position = 0; data.push(itou(data_val)); data_val = 0; } } dictionary[c] = _node(dictSize++); // Note of that we already wrote // the charCode token to the bitstream freshNode = true; } // add node representing prefix + new charCode to trie node.d[c] = _node(dictSize++); // increase token bitlength if necessary if (--enlargeIn === 0) { enlargeIn = 1 << numBits++; } // set node to first charCode of new prefix node = dictionary[c]; } } // === Write last prefix to output === if (freshNode) { // character token already written to output freshNode = false; } else { // write out the prefix token value = node.v; for (i = 0; i < numBits; i++) { // shifting has precedence over bitmasking data_val = value >> i & 1 | data_val << 1; if (++data_position === 15) { data_position = 0; data.push(itou(data_val)); data_val = 0; } } } // Is c a new character? if (dictionary[c] === undefined) { // increase token bitlength if necessary if (--enlargeIn === 0) { enlargeIn = 1 << numBits++; } for (i = 0; i < numBits; i++) { data_val <<= 1; if (++data_position === 15) { data_position = 0; data.push(itou(data_val)); data_val = 0; } } for (i = 0; i < 8; i++) { data_val = c >> i & 1 | data_val << 1; if (++data_position === 15) { data_position = 0; data.push(itou(data_val)); data_val = 0; } } } // increase token bitlength if necessary if (--enlargeIn === 0) { enlargeIn = 1 << numBits++; } } // Mark the end of the stream for (i = 0; i < numBits; i++) { // shifting has precedence over bitmasking data_val = 1 >> i | data_val << 1; if (++data_position === 15) { data_position = 0; data.push(itou(data_val)); data_val = 0; } } // Flush the last char data_val <<= 15 - data_position; data.push(itou(data_val)); data.push(' '); return data.join(''); } export function decompress(compressed) { if (compressed === null || compressed.length < 4) return null; let length = compressed.length, getNextValue = function (index) { return utoi(compressed.charCodeAt(index)); }; let dictionary = [0, 1], enlargeIn = 1, dictSize = 3, numBits = 2, bytes = null, bytes_concat = null, result = new Uint8Array( getNextValue(0) * 0x40000000 + (getNextValue(1) << 15) + getNextValue(2)), result_index = 0, bits = 0, maxPower = 2, power = 0, data_val = getNextValue(3), data_position = 15, data_index = 4; // Get first token, guaranteed to be either // a new byte token or end of stream token. while (power < maxPower) { // shifting has precedence over bitmasking bits += (data_val >> --data_position & 1) << power++; if (data_position === 0) { data_position = 15; data_val = getNextValue(data_index++); } } if (bits === 1) { return null; } // else, get byte value bits = power = 0; while (power < 8) { // shifting has precedence over bitmasking bits += (data_val >> --data_position & 1) << power++; if (data_position === 0) { data_position = 15; data_val = getNextValue(data_index++); } } bytes = [bits]; dictionary[2] = bytes; result[result_index++] = bits; // read rest of string while (data_index <= length) { // read out next token maxPower = numBits; bits = power = 0; while (power < maxPower) { // shifting has precedence over bitmasking bits += (data_val >> --data_position & 1) << power++; if (data_position === 0) { data_position = 15; data_val = getNextValue(data_index++); } } // 0 implies new byte if (!bits) { bits = power = 0; while (power < 8) { // shifting has precedence over bitmasking bits += (data_val >> --data_position & 1) << power++; if (data_position === 0) { data_position = 15; data_val = getNextValue(data_index++); } } dictionary[dictSize] = [bits]; bits = dictSize++; if (--enlargeIn === 0) { enlargeIn = 1 << numBits++; } } else if (bits === 1) { // end of stream token return result; } if (bits > dictionary.length) { return null; } bytes_concat = bits < dictionary.length ? dictionary[bits] : bytes.concat(bytes[0]); for (let i = 0; i < bytes_concat.length; i++) { result[result_index++] = bytes_concat[i]; } dictionary[dictSize++] = bytes.concat(bytes_concat[0]); bytes = bytes_concat; if (--enlargeIn === 0) { enlargeIn = 1 << numBits++; } } return null; } export const LZ = { itou, utoi, compress, decompress }; function testCompression(LZ) { console.log('Testing utoi/itou functions'); let utoiMismatches = []; for (let i = 0; i < 1 << 15; i++) { let j = LZ.utoi(LZ.itou(i).charCodeAt(0)); if (i !== j) { utoiMismatches.push({ itou: i, utio: j }); } } if (utoiMismatches.length) { console.log("Errors in itou/utoi conversion detected:", utoiMismatches); } else { console.log('No errors in itou/utoi conversion detected'); } let input = new Uint16Array(1 << 15); for (let i = 0; i < input.length; i++) { input[i] = i >> 4; } let inputUint8 = new Uint8Array(input.buffer); let compressed = LZ.compress(inputUint8); let decompressed = new Uint16Array(LZ.decompress(compressed).buffer); let mismatches = []; for (let i = 0; i < input.length; i++) { if (input[i] !== decompressed[i]) { mismatches.push({ index: i, input: input[i], decompressed: decompressed[i] }); } } console.log({ compressed, mismatches, length: compressed.length, inputLength: input.length, }); } ```

/** Based on https://gist.github.com/JobLeonard/4bbc5cd5952f25560d59abb2d056f2a0 by @JobLeonard / / basic ranges of printable UTF16 values (as found in LZ-string): [32, 127), [160, 55296), [63744, 65536) We also have filter out string characters like: " (34) ' (39) ` (44) (Forward tick is safe: ´ (96)) So: 32, 33, [35, 39), [40, 44), [45, 127), [160, 55296), [63744, 65536) */ // integer to unicode: export function itou(i: number): string { i += 32; if (i > 33 && i < 39) { i++; } else if (i > 38 && i < 44) { i += 2; } else if (i > 43 && i < 127) { i += 3; } else if (i > 126 && i < 55258) { i += 37; // === 160 - 128 + 3 } else if (i > 55295) { i += 8485; // === 63744 - 55296 + 37
} return String.fromCharCode(i); }

export function utoi(i: number): number { return i - (i > 63743 ? 8517 : i > 159 ? 69 : i > 46 && i < 130 ? 35 : i > 40 && i < 46 ? 34 : i > 34 && i < 40 ? 33 : 32); }

function _node (val: number) { return { v: val, d: {} }; }

export function compress(input: Uint8Array): string { if (input === null) return '';

let i = 0, j = 0, value = 0, dictionary = { d: {} } as { d: { [key: number]: { v: number, d: Record<number, any> } } }, freshNode = true, c = 0, node = _node(2), // first node will always be initialised like this. nextNode, enlargeIn = 2, dictSize = 3, numBits = 2, data: string[] = [], data_val = 0, data_position = 0;

if (input.length) { // Write length of the input as the first four unicode characters, // Or 45 bits. 1<<45 bytes is about 35 terabytes, so more than enough. value = input.length; data.push(itou(value / 40000000 & 0x7FFF)); data.push(itou((value >>> 15) & 0x7FFF)); data.push(itou(value & 0x7FFF));

less
// If there is an array, the first byte is guaranteed to // be new, so we write it to output stream, and add it to the // dictionary. For the same reason we can initialize freshNode // as true, and new_node, node and dictSize as if // it was already added to the dictionary (see above). c = input[0]; // === Write first byte token to output == // insert new byte
Avatar of the person chatting

Optimize this code so it's able to compress and decompress faster

There are several ways to optimize this code to make it faster. Here are a few suggestions:

  1. Use a Map instead of an object for the dictionary.

In the current implementation, the dictionary is implemented as an object. However, objects in JavaScript have some performance overhead compared to maps. Using a Map would improve the performance of the code.

typescript
const dictionary = new Map<number, { v: number, d: Record<number, any> }>();
  1. Replace Array.push with an index-based approach

Appending new elements to an array using Array.push() is slower than setting the element at a specific index. This is because Array.push() may cause the array to resize, which is a relatively expensive operation. In the current implementation, data is an array that is used to store compressed data. Instead of using Array.push(), you can store the compressed data directly at the appropriate index.

typescript
const data = new Array<string>(input.length); data[position] = itou(value / 40000000 & 0x7FFF); data[position + 1] = itou((value >>> 15) & 0x7FFF); data[position + 2] = itou(value & 0x7FFF);
  1. Use a switch statement for itou() function

The itou() function uses several if-else statements to compute the Unicode code point for an integer value. This can be replaced with a switch statement, which is generally faster.

typescript
function itou(i: number) { i += 32; switch (true) { case i > 33 && i < 39: i++; break; case i > 38 && i < 44: i += 2; break; case i > 43 && i < 127: i += 3; break; case i > 126 && i < 55258: i += 37; // === 160 - 128 + 3 break; case i > 55295: i += 8485; // === 63744 - 55296 + 37 break; } return String.fromCharCode(i); }
  1. Avoid unnecessary calculations

Some calculations can be optimized or avoided altogether. For example, in the current implementation, the value of data_position is reset to 0 every time it reaches 15, which means that it is effectively always 0 or 1. Instead of incrementing data_position and then checking if it is equal to 15, you can use a bitwise AND operation to achieve the same result.

typescript
if ((++data_position & 15) === 0) { data.push(itou(data_val)); data_val = 0; }
  1. Use a larger buffer for the compressed data

In the current implementation, the compressed data is stored in an array of strings. This can be slow because creating a new string for each compressed value can be expensive. One way to improve performance is to use a larger buffer for the compressed data and only convert it to a string at the end.

typescript
const bufferSize = input.length * 10; // use a larger buffer size const buffer = new Uint16Array(bufferSize); let bufferPosition = 0; function writeData(val: string) { for (let i = 0; i < val.length; i++) { buffer[bufferPosition++] = val.charCodeAt(i); } } // ... writeData(itou(value / 40000000 & 0x7FFF)); writeData(itou((value >>> 15) & 0x7FFF)); writeData(itou(value &
Avatar of the person chatting

4 / 4Please finish your response, note you are optimizing for performance on js that runs on the browser

Sure, here are a few optimizations that could help improve the performance of the code for compression and decompression in the browser:

  1. Replace string concatenation with array push and join: In JavaScript, string concatenation is an expensive operation because it involves creating new strings each time. Instead, we can use an array to push the compressed characters, and then join them at the end to create the final compressed string.
java
// Before compressed += count + char; // After compressedArray.push(count + char);
  1. Use typed arrays for character counts: In the original code, the character counts are stored as regular JavaScript numbers. However, using typed arrays can be faster for large data sets, since they allow for more efficient memory management.
javascript
// Before let count = 1; // After let count = new Uint8Array(1); count[0] = 1;
  1. Use bitwise operators to extract character and count: The current code uses string manipulation to extract the character and count from the compressed string. However, we can use bitwise operators to extract them more efficiently.
scss
// Before const char = compressed[i]; const count = parseInt(compressed.slice(i + 1, i + 3)); // After const char = compressed.charCodeAt(i); const count = compressed[i + 1] | (compressed[i + 2] << 8);
  1. Avoid using regular expressions: Regular expressions are a powerful tool, but they can also be slow. In the current code, regular expressions are used to split the original string into chunks of repeated characters. We can achieve the same result using a loop and basic string manipulation.
rust
// Before const chunks = str.match(/(.)\1*/g); // After const chunks = []; let i = 0; while (i < str.length) { let j = i + 1; while (j < str.length && str[j] === str[i]) { j++; } chunks.push(str.slice(i, j)); i = j; }

These are just a few optimizations that could help improve the performance of the code for compression and decompression in the browser. However, it's important to note that the exact optimizations that work best will depend on the specific use case, and it's always a good idea to measure performance before and after making changes to see if they have the desired effect.