123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213 |
- """
- Update Emoji.py
- Refreshes OMZ emoji database based on the latest Unicode spec
- """
- import re
- import json
- spec = open("emoji-data.txt", "r")
- # Regexes
- # regex_emoji will return, respectively:
- # the code points, its type (status), the actual emoji, and its official name
- regex_emoji = r"^([\w ].*?\S)\s*;\s*([\w-]+)\s*#\s*(.*?)\s(\S.*).*$"
- # regex_group returns the group of subgroup that a line opens
- regex_group = r"^#\s*(group|subgroup):\s*(.*)$"
- headers = """
- # emoji-char-definitions.zsh - Emoji definitions for oh-my-zsh emoji plugin
- #
- # This file is auto-generated by update_emoji.py. Do not edit it manually.
- #
- # This contains the definition for:
- # $emoji - which maps character names to Unicode characters
- # $emoji_flags - maps country names to Unicode flag characters using region
- # indicators
- # $emoji_mod - maps modifier components to Unicode characters
- # $emoji_groups - a single associative array to avoid cluttering up the
- # global namespace, and to allow adding additional group
- # definitions at run time. The keys are the group names, and
- # the values are whitespace-separated lists of emoji
- # character names.
- # Main emoji
- typeset -gAH emoji
- # National flags
- typeset -gAH emoji_flags
- # Combining modifiers
- typeset -gAH emoji_mod
- # Emoji groups
- typeset -gAH emoji_groups
- """
- #######
- # Adding country codes
- #######
- # This is the only part of this script that relies on an external library
- # (country_converter), and is hence commented out by default.
- # You can uncomment it to have country codes added as aliases for flag
- # emojis. (By default, when you install this extension, country codes are
- # included as aliases, but not if you re-run this script without uncommenting.)
- # Warning: country_converter is very verbose, and will print warnings all over
- # your terminal.
- # import country_converter as coco # pylint: disable=wrong-import-position
- # cc = coco.CountryConverter()
- # def country_iso(_all_names, _omz_name):
- # """ Using the external library country_converter,
- # this function can detect the ISO2 and ISO3 codes
- # of the country. It takes as argument the array
- # with all the names of the emoji, and returns that array."""
- # omz_no_underscore = re.sub(r'_', r' ', _omz_name)
- # iso2 = cc.convert(names=[omz_no_underscore], to='ISO2')
- # if iso2 != 'not found':
- # _all_names.append(iso2)
- # iso3 = cc.convert(names=[omz_no_underscore], to='ISO3')
- # _all_names.append(iso3)
- # return _all_names
- #######
- # Helper functions
- #######
- def code_to_omz(_code_points):
- """ Returns a ZSH-compatible Unicode string from the code point(s) """
- return r'\U' + r'\U'.join(_code_points.split(' '))
- def name_to_omz(_name, _group, _subgroup, _status):
- """ Returns a reasonable snake_case name for the emoji. """
- def snake_case(_string):
- """ Does the regex work of snake_case """
- remove_dots = re.sub(r'\.\(\)', r'', _string)
- replace_ands = re.sub(r'\&', r'and', remove_dots)
- remove_whitespace = re.sub(r'[^\#\*\w]', r'_', replace_ands)
- return re.sub(r'__', r'_', remove_whitespace)
- shortname = ""
- split_at_colon = lambda s: s.split(": ")
- # Special treatment by group and subgroup
- # If the emoji is a flag, we strip "flag" from its name
- if _group == "Flags" and len(split_at_colon(_name)) > 1:
- shortname = snake_case(split_at_colon(_name)[1])
- else:
- shortname = snake_case(_name)
- # Special treatment by status
- # Enables us to have every emoji combination,
- # even the one that are not officially sanctioned
- # and are implemented by, say, only one vendor
- if _status == "unqualified":
- shortname += "_unqualified"
- elif _status == "minimally-qualified":
- shortname += "_minimally"
- return shortname
- def increment_name(_shortname):
- """ Increment the short name by 1. If you get, say,
- 'woman_detective_unqualified', it returns
- 'woman_detective_unqualified_1', and then
- 'woman_detective_unqualified_2', etc. """
- last_char = _shortname[-1]
- if last_char.isdigit():
- num = int(last_char)
- return _shortname[:-1] + str(num + 1)
- return _shortname + "_1"
- ########
- # Going through every line
- ########
- group, subgroup, short_name_buffer = "", "", ""
- emoji_database = []
- for line in spec:
- # First, test if this line opens a group or subgroup
- group_match = re.findall(regex_group, line)
- if group_match != []:
- gr_or_sub, name = group_match[0]
- if gr_or_sub == "group":
- group = name
- elif gr_or_sub == "subgroup":
- subgroup = name
- continue # Moving on...
- # Second, test if this line references one emoji
- emoji_match = re.findall(regex_emoji, line)
- if emoji_match != []:
- code_points, status, emoji, name = emoji_match[0]
- omz_codes = code_to_omz(code_points)
- omz_name = name_to_omz(name, group, subgroup, status)
- # If this emoji has the same shortname as the preceding one
- if omz_name in short_name_buffer:
- omz_name = increment_name(short_name_buffer)
- short_name_buffer = omz_name
- emoji_database.append(
- [omz_codes, status, emoji, omz_name, group, subgroup])
- spec.close()
- ########
- # Write to emoji-char-definitions.zsh
- ########
- # Aliases for emojis are retrieved through the DB of Gemoji
- # Retrieved on Aug 9 2019 from the following URL:
- # https://raw.githubusercontent.com/github/gemoji/master/db/emoji.json
- gemoji_db = open("gemoji_db.json")
- j = json.load(gemoji_db)
- aliases_map = {entry['emoji']: entry['aliases'] for entry in j}
- all_omz_names = [emoji_data[3] for emoji_data in emoji_database]
- # Let's begin writing to this file
- output = open("emoji-char-definitions.zsh", "w")
- output.write(headers)
- emoji_groups = {"fruits": "\n", "vehicles": "\n", "hands": "\n",
- "people": "\n", "animals": "\n", "faces": "\n",
- "flags": "\n"}
- # First, write every emoji down
- for _omz_codes, _status, _emoji, _omz_name, _group, _subgroup in emoji_database:
- # One emoji can be mapped to multiple names (aliases or country codes)
- names_for_this_emoji = [_omz_name]
- # Variable that indicates in which map the emoji will be located
- emoji_map = "emoji"
- if _status == "component":
- emoji_map = "emoji_mod"
- if _group == "Flags":
- emoji_map = "emoji_flags"
- # Adding country codes (Optional, see above)
- # names_for_this_emoji = country_iso(names_for_this_emoji, _omz_name)
- # Check if there is an alias available in the Gemoji DB
- if _emoji in aliases_map.keys():
- for alias in aliases_map[_emoji]:
- if alias not in all_omz_names:
- names_for_this_emoji.append(alias)
- # And now we write to the definitions file
- for one_name in names_for_this_emoji:
- output.write(f"{emoji_map}[{one_name}]=$'{_omz_codes}'\n")
- # Storing the emoji in defined subgroups for the next step
- if _status == "fully-qualified":
- if _subgroup == "food-fruit":
- emoji_groups["fruits"] += f" {_omz_name}\n"
- elif "transport-" in _subgroup:
- emoji_groups["vehicles"] += f" {_omz_name}\n"
- elif "hand-" in _subgroup:
- emoji_groups["hands"] += f" {_omz_name}\n"
- elif "person-" in _subgroup or _subgroup == "family":
- emoji_groups["people"] += f" {_omz_name}\n"
- elif "animal-" in _subgroup:
- emoji_groups["animals"] += f" {_omz_name}\n"
- elif "face-" in _subgroup:
- emoji_groups["faces"] += f" {_omz_name}\n"
- elif _group == "Flags":
- emoji_groups["flags"] += f" {_omz_name}\n"
- # Second, write the subgroups to the end of the file
- for name, string in emoji_groups.items():
- output.write(f'\nemoji_groups[{name}]="{string}"\n')
- output.close()
|