forked from berserker/microblog
Compress and combine emoji data (#5229)
parent
eb5ac23434
commit
fd7f0732fe
22 changed files with 254 additions and 93 deletions
@ -1,22 +0,0 @@ |
||||
// @preval
|
||||
const data = require('emoji-mart/dist/data').default; |
||||
const pick = require('lodash/pick'); |
||||
const values = require('lodash/values'); |
||||
|
||||
const condensedEmojis = Object.keys(data.emojis).map(key => { |
||||
if (!data.emojis[key].short_names[0] === key) { |
||||
throw new Error('The condenser expects the first short_code to be the ' + |
||||
'key. It may need to be rewritten if the emoji change such that this ' + |
||||
'is no longer the case.'); |
||||
} |
||||
return values(pick(data.emojis[key], ['short_names', 'unified', 'search'])); |
||||
}); |
||||
|
||||
// JSON.parse/stringify is to emulate what @preval is doing and avoid any
|
||||
// inconsistent behavior in dev mode
|
||||
module.exports = JSON.parse(JSON.stringify({ |
||||
emojis: condensedEmojis, |
||||
skins: data.skins, |
||||
categories: data.categories, |
||||
short_names: data.short_names, |
||||
})); |
@ -1,16 +0,0 @@ |
||||
const data = require('./emoji_data_compressed'); |
||||
|
||||
// decompress
|
||||
const emojis = {}; |
||||
data.emojis.forEach(compressedEmoji => { |
||||
const [ short_names, unified, search ] = compressedEmoji; |
||||
emojis[short_names[0]] = { |
||||
short_names, |
||||
unified, |
||||
search, |
||||
}; |
||||
}); |
||||
|
||||
data.emojis = emojis; |
||||
|
||||
module.exports = data; |
@ -1,38 +0,0 @@ |
||||
// @preval
|
||||
// http://www.unicode.org/Public/emoji/5.0/emoji-test.txt
|
||||
|
||||
const emojis = require('./emoji_map.json'); |
||||
const { emojiIndex } = require('emoji-mart'); |
||||
const excluded = ['®', '©', '™']; |
||||
const skins = ['🏻', '🏼', '🏽', '🏾', '🏿']; |
||||
const shortcodeMap = {}; |
||||
|
||||
Object.keys(emojiIndex.emojis).forEach(key => { |
||||
shortcodeMap[emojiIndex.emojis[key].native] = emojiIndex.emojis[key].id; |
||||
}); |
||||
|
||||
const stripModifiers = unicode => { |
||||
skins.forEach(tone => { |
||||
unicode = unicode.replace(tone, ''); |
||||
}); |
||||
|
||||
return unicode; |
||||
}; |
||||
|
||||
Object.keys(emojis).forEach(key => { |
||||
if (excluded.includes(key)) { |
||||
delete emojis[key]; |
||||
return; |
||||
} |
||||
|
||||
const normalizedKey = stripModifiers(key); |
||||
let shortcode = shortcodeMap[normalizedKey]; |
||||
|
||||
if (!shortcode) { |
||||
shortcode = shortcodeMap[normalizedKey + '\uFE0F']; |
||||
} |
||||
|
||||
emojis[key] = [emojis[key], shortcode]; |
||||
}); |
||||
|
||||
module.exports.unicodeMapping = emojis; |
@ -0,0 +1,90 @@ |
||||
// @preval
|
||||
// http://www.unicode.org/Public/emoji/5.0/emoji-test.txt
|
||||
// This file contains the compressed version of the emoji data from
|
||||
// both emoji_map.json and from emoji-mart's emojiIndex and data objects.
|
||||
// It's designed to be emitted in an array format to take up less space
|
||||
// over the wire.
|
||||
|
||||
const { unicodeToFilename } = require('./unicode_to_filename'); |
||||
const { unicodeToUnifiedName } = require('./unicode_to_unified_name'); |
||||
const emojiMap = require('./emoji_map.json'); |
||||
const { emojiIndex } = require('emoji-mart'); |
||||
const emojiMartData = require('emoji-mart/dist/data').default; |
||||
const excluded = ['®', '©', '™']; |
||||
const skins = ['🏻', '🏼', '🏽', '🏾', '🏿']; |
||||
const shortcodeMap = {}; |
||||
|
||||
const shortCodesToEmojiData = {}; |
||||
const emojisWithoutShortCodes = []; |
||||
|
||||
Object.keys(emojiIndex.emojis).forEach(key => { |
||||
shortcodeMap[emojiIndex.emojis[key].native] = emojiIndex.emojis[key].id; |
||||
}); |
||||
|
||||
const stripModifiers = unicode => { |
||||
skins.forEach(tone => { |
||||
unicode = unicode.replace(tone, ''); |
||||
}); |
||||
|
||||
return unicode; |
||||
}; |
||||
|
||||
Object.keys(emojiMap).forEach(key => { |
||||
if (excluded.includes(key)) { |
||||
delete emojiMap[key]; |
||||
return; |
||||
} |
||||
|
||||
const normalizedKey = stripModifiers(key); |
||||
let shortcode = shortcodeMap[normalizedKey]; |
||||
|
||||
if (!shortcode) { |
||||
shortcode = shortcodeMap[normalizedKey + '\uFE0F']; |
||||
} |
||||
|
||||
const filename = emojiMap[key]; |
||||
|
||||
const filenameData = [key]; |
||||
|
||||
if (unicodeToFilename(key) !== filename) { |
||||
// filename can't be derived using unicodeToFilename
|
||||
filenameData.push(filename); |
||||
} |
||||
|
||||
if (typeof shortcode === 'undefined') { |
||||
emojisWithoutShortCodes.push(filenameData); |
||||
} else { |
||||
shortCodesToEmojiData[shortcode] = shortCodesToEmojiData[shortcode] || [[]]; |
||||
shortCodesToEmojiData[shortcode][0].push(filenameData); |
||||
} |
||||
}); |
||||
|
||||
Object.keys(emojiIndex.emojis).forEach(key => { |
||||
const { native } = emojiIndex.emojis[key]; |
||||
const { short_names, search, unified } = emojiMartData.emojis[key]; |
||||
if (short_names[0] !== key) { |
||||
throw new Error('The compresser expects the first short_code to be the ' + |
||||
'key. It may need to be rewritten if the emoji change such that this ' + |
||||
'is no longer the case.'); |
||||
} |
||||
|
||||
short_names.splice(0, 1); // first short name can be inferred from the key
|
||||
|
||||
const searchData = [native, short_names, search]; |
||||
if (unicodeToUnifiedName(native) !== unified) { |
||||
// unified name can't be derived from unicodeToUnifiedName
|
||||
searchData.push(unified); |
||||
} |
||||
|
||||
shortCodesToEmojiData[key].push(searchData); |
||||
}); |
||||
|
||||
// JSON.parse/stringify is to emulate what @preval is doing and avoid any
|
||||
// inconsistent behavior in dev mode
|
||||
module.exports = JSON.parse(JSON.stringify([ |
||||
shortCodesToEmojiData, |
||||
emojiMartData.skins, |
||||
emojiMartData.categories, |
||||
emojiMartData.short_names, |
||||
emojisWithoutShortCodes, |
||||
])); |
@ -0,0 +1,41 @@ |
||||
// The output of this module is designed to mimic emoji-mart's
|
||||
// "data" object, such that we can use it for a light version of emoji-mart's
|
||||
// emojiIndex.search functionality.
|
||||
const { unicodeToUnifiedName } = require('./unicode_to_unified_name'); |
||||
const [ shortCodesToEmojiData, skins, categories, short_names ] = require('./emoji_compressed'); |
||||
|
||||
const emojis = {}; |
||||
|
||||
// decompress
|
||||
Object.keys(shortCodesToEmojiData).forEach((shortCode) => { |
||||
let [ |
||||
filenameData, // eslint-disable-line no-unused-vars
|
||||
searchData, |
||||
] = shortCodesToEmojiData[shortCode]; |
||||
let [ |
||||
native, |
||||
short_names, |
||||
search, |
||||
unified, |
||||
] = searchData; |
||||
|
||||
if (!unified) { |
||||
// unified name can be derived from unicodeToUnifiedName
|
||||
unified = unicodeToUnifiedName(native); |
||||
} |
||||
|
||||
short_names = [shortCode].concat(short_names); |
||||
emojis[shortCode] = { |
||||
native, |
||||
search, |
||||
short_names, |
||||
unified, |
||||
}; |
||||
}); |
||||
|
||||
module.exports = { |
||||
emojis, |
||||
skins, |
||||
categories, |
||||
short_names, |
||||
}; |
@ -1,7 +1,7 @@ |
||||
// This code is largely borrowed from:
|
||||
// https://github.com/missive/emoji-mart/blob/bbd4fbe/src/utils/emoji-index.js
|
||||
|
||||
import data from './emoji_data_light'; |
||||
import data from './emoji_mart_data_light'; |
||||
import { getData, getSanitizedData, intersect } from './emoji_utils'; |
||||
|
||||
let index = {}; |
@ -0,0 +1,35 @@ |
||||
// A mapping of unicode strings to an object containing the filename
|
||||
// (i.e. the svg filename) and a shortCode intended to be shown
|
||||
// as a "title" attribute in an HTML element (aka tooltip).
|
||||
|
||||
const [ |
||||
shortCodesToEmojiData, |
||||
skins, // eslint-disable-line no-unused-vars
|
||||
categories, // eslint-disable-line no-unused-vars
|
||||
short_names, // eslint-disable-line no-unused-vars
|
||||
emojisWithoutShortCodes, |
||||
] = require('./emoji_compressed'); |
||||
const { unicodeToFilename } = require('./unicode_to_filename'); |
||||
|
||||
// decompress
|
||||
const unicodeMapping = {}; |
||||
|
||||
function processEmojiMapData(emojiMapData, shortCode) { |
||||
let [ native, filename ] = emojiMapData; |
||||
if (!filename) { |
||||
// filename name can be derived from unicodeToFilename
|
||||
filename = unicodeToFilename(native); |
||||
} |
||||
unicodeMapping[native] = { |
||||
shortCode: shortCode, |
||||
filename: filename, |
||||
}; |
||||
} |
||||
|
||||
Object.keys(shortCodesToEmojiData).forEach((shortCode) => { |
||||
let [ filenameData ] = shortCodesToEmojiData[shortCode]; |
||||
filenameData.forEach(emojiMapData => processEmojiMapData(emojiMapData, shortCode)); |
||||
}); |
||||
emojisWithoutShortCodes.forEach(emojiMapData => processEmojiMapData(emojiMapData)); |
||||
|
||||
module.exports = unicodeMapping; |
@ -1,7 +1,7 @@ |
||||
// This code is largely borrowed from:
|
||||
// https://github.com/missive/emoji-mart/blob/bbd4fbe/src/utils/index.js
|
||||
|
||||
import data from './emoji_data_light'; |
||||
import data from './emoji_mart_data_light'; |
||||
|
||||
const COLONS_REGEX = /^(?:\:([^\:]+)\:)(?:\:skin-tone-(\d)\:)?$/; |
||||
|
@ -0,0 +1,26 @@ |
||||
// taken from:
|
||||
// https://github.com/twitter/twemoji/blob/47732c7/twemoji-generator.js#L848-L866
|
||||
exports.unicodeToFilename = (str) => { |
||||
let result = ''; |
||||
let charCode = 0; |
||||
let p = 0; |
||||
let i = 0; |
||||
while (i < str.length) { |
||||
charCode = str.charCodeAt(i++); |
||||
if (p) { |
||||
if (result.length > 0) { |
||||
result += '-'; |
||||
} |
||||
result += (0x10000 + ((p - 0xD800) << 10) + (charCode - 0xDC00)).toString(16); |
||||
p = 0; |
||||
} else if (0xD800 <= charCode && charCode <= 0xDBFF) { |
||||
p = charCode; |
||||
} else { |
||||
if (result.length > 0) { |
||||
result += '-'; |
||||
} |
||||
result += charCode.toString(16); |
||||
} |
||||
} |
||||
return result; |
||||
}; |
@ -0,0 +1,17 @@ |
||||
function padLeft(str, num) { |
||||
while (str.length < num) { |
||||
str = '0' + str; |
||||
} |
||||
return str; |
||||
} |
||||
|
||||
exports.unicodeToUnifiedName = (str) => { |
||||
let output = ''; |
||||
for (let i = 0; i < str.length; i += 2) { |
||||
if (i > 0) { |
||||
output += '-'; |
||||
} |
||||
output += padLeft(str.codePointAt(i).toString(16).toUpperCase(), 4); |
||||
} |
||||
return output; |
||||
}; |
Loading…
Reference in new issue