[Keymap] develop updates for Drashna Keymaps (#18472)
parent
34490f098a
commit
5abb125b02
@ -1,304 +0,0 @@
|
||||
// Copyright 2021 Google LLC
|
||||
// Copyright 2021 @filterpaper
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// Original source: https://getreuer.info/posts/keyboards/autocorrection
|
||||
|
||||
#include "autocorrection.h"
|
||||
#include <string.h>
|
||||
|
||||
#if __has_include("autocorrection_data.h")
|
||||
# pragma GCC push_options
|
||||
# pragma GCC optimize("O0")
|
||||
# include "autocorrection_data.h"
|
||||
# ifndef AUTOCORRECTION_MIN_LENGTH
|
||||
# define AUTOCORRECTION_MIN_LENGTH AUTOCORRECT_MIN_LENGTH
|
||||
# endif
|
||||
# ifndef AUTOCORRECTION_MAX_LENGTH
|
||||
# define AUTOCORRECTION_MAX_LENGTH AUTOCORRECT_MAX_LENGTH
|
||||
# endif
|
||||
# define autocorrection_data autocorrect_data
|
||||
# if AUTOCORRECTION_MIN_LENGTH < 4
|
||||
# error Minimum Length is too short and may cause overflows
|
||||
# endif
|
||||
# if DICTIONARY_SIZE > SIZE_MAX
|
||||
# error Dictionary size excees maximum size permitted
|
||||
# endif
|
||||
|
||||
static uint8_t typo_buffer[AUTOCORRECT_MAX_LENGTH] = {KC_SPC};
|
||||
static uint8_t typo_buffer_size = 1;
|
||||
|
||||
/**
|
||||
* @brief function for querying the enabled state of autocorrect
|
||||
*
|
||||
* @return true if enabled
|
||||
* @return false if disabled
|
||||
*/
|
||||
bool autocorrect_is_enabled(void) {
|
||||
return userspace_config.autocorrection;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Enables autocorrect and saves state to eeprom
|
||||
*
|
||||
*/
|
||||
void autocorrect_enable(void) {
|
||||
userspace_config.autocorrection = true;
|
||||
eeconfig_update_user(userspace_config.raw);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Disables autocorrect and saves state to eeprom
|
||||
*
|
||||
*/
|
||||
void autocorrect_disable(void) {
|
||||
userspace_config.autocorrection = false;
|
||||
typo_buffer_size = 0;
|
||||
eeconfig_update_user(userspace_config.raw);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Toggles autocorrect's status and save state to eeprom
|
||||
*
|
||||
*/
|
||||
void autocorrect_toggle(void) {
|
||||
userspace_config.autocorrection = !userspace_config.autocorrection;
|
||||
typo_buffer_size = 0;
|
||||
eeconfig_update_user(userspace_config.raw);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief handler for determining if autocorrect should process keypress
|
||||
*
|
||||
* @param keycode Keycode registered by matrix press, per keymap
|
||||
* @param record keyrecord_t structure
|
||||
* @param typo_buffer_size passed along to allow resetting of autocorrect buffer
|
||||
* @param mods allow processing of mod status
|
||||
* @return true Allow autocorection
|
||||
* @return false Stop processing and escape from autocorrect.
|
||||
*/
|
||||
__attribute__((weak)) bool process_autocorrect_user(uint16_t *keycode, keyrecord_t *record, uint8_t *typo_buffer_size, uint8_t *mods) {
|
||||
// See quantum_keycodes.h for reference on these matched ranges.
|
||||
switch (*keycode) {
|
||||
// Exclude these keycodes from processing.
|
||||
case KC_LSFT:
|
||||
case KC_RSFT:
|
||||
case KC_CAPS:
|
||||
case QK_TO ... QK_ONE_SHOT_LAYER_MAX:
|
||||
case QK_LAYER_TAP_TOGGLE ... QK_LAYER_MOD_MAX:
|
||||
case QK_ONE_SHOT_MOD ... QK_ONE_SHOT_MOD_MAX:
|
||||
return false;
|
||||
|
||||
// Mask for base keycode from shifted keys.
|
||||
case QK_LSFT ... QK_LSFT + 255:
|
||||
case QK_RSFT ... QK_RSFT + 255:
|
||||
if (*keycode >= QK_LSFT && *keycode <= (QK_LSFT + 255)) {
|
||||
*mods |= MOD_LSFT;
|
||||
} else {
|
||||
*mods |= MOD_RSFT;
|
||||
}
|
||||
*keycode &= 0xFF; // Get the basic keycode.
|
||||
return true;
|
||||
# ifndef NO_ACTION_TAPPING
|
||||
// Exclude tap-hold keys when they are held down
|
||||
// and mask for base keycode when they are tapped.
|
||||
case QK_LAYER_TAP ... QK_LAYER_TAP_MAX:
|
||||
# ifdef NO_ACTION_LAYER
|
||||
// Exclude Layer Tap, if layers are disabled
|
||||
// but action tapping is still enabled.
|
||||
return false;
|
||||
# endif
|
||||
case QK_MOD_TAP ... QK_MOD_TAP_MAX:
|
||||
// Exclude hold keycode
|
||||
if (!record->tap.count) {
|
||||
return false;
|
||||
}
|
||||
*keycode &= 0xFF;
|
||||
break;
|
||||
# else
|
||||
case QK_MOD_TAP ... QK_MOD_TAP_MAX:
|
||||
case QK_LAYER_TAP ... QK_LAYER_TAP_MAX:
|
||||
// Exclude if disabled
|
||||
return false;
|
||||
# endif
|
||||
// Exclude swap hands keys when they are held down
|
||||
// and mask for base keycode when they are tapped.
|
||||
case QK_SWAP_HANDS ... QK_SWAP_HANDS_MAX:
|
||||
# ifdef SWAP_HANDS_ENABLE
|
||||
if (*keycode >= 0x56F0 || !record->tap.count) {
|
||||
return false;
|
||||
}
|
||||
*keycode &= 0xFF;
|
||||
break;
|
||||
# else
|
||||
// Exclude if disabled
|
||||
return false;
|
||||
# endif
|
||||
}
|
||||
|
||||
// Disable autocorrect while a mod other than shift is active.
|
||||
if ((*mods & ~MOD_MASK_SHIFT) != 0) {
|
||||
*typo_buffer_size = 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief handling for when autocorrection has been triggered
|
||||
*
|
||||
* @param backspaces number of characters to remove
|
||||
* @param str pointer to PROGMEM string to replace mistyped seletion with
|
||||
* @return true apply correction
|
||||
* @return false user handled replacement
|
||||
*/
|
||||
__attribute__((weak)) bool apply_autocorrect(uint8_t backspaces, const char *str) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Process handler for autocorrect feature
|
||||
*
|
||||
* @param keycode Keycode registered by matrix press, per keymap
|
||||
* @param record keyrecord_t structure
|
||||
* @return true Continue processing keycodes, and send to host
|
||||
* @return false Stop processing keycodes, and don't send to host
|
||||
*/
|
||||
bool process_autocorrection(uint16_t keycode, keyrecord_t *record) {
|
||||
uint8_t mods = get_mods();
|
||||
# ifndef NO_ACTION_ONESHOT
|
||||
mods |= get_oneshot_mods();
|
||||
# endif
|
||||
|
||||
if ((keycode >= AUTOCORRECT_ON && keycode <= AUTOCORRECT_TOGGLE) && record->event.pressed) {
|
||||
if (keycode == AUTOCORRECT_ON) {
|
||||
autocorrect_enable();
|
||||
} else if (keycode == AUTOCORRECT_OFF) {
|
||||
autocorrect_disable();
|
||||
} else if (keycode == AUTOCORRECT_TOGGLE) {
|
||||
autocorrect_toggle();
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!autocorrect_is_enabled()) {
|
||||
typo_buffer_size = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!record->event.pressed) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// autocorrect keycode verification and extraction
|
||||
if (!process_autocorrect_user(&keycode, record, &typo_buffer_size, &mods)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// keycode buffer check
|
||||
switch (keycode) {
|
||||
case KC_A ... KC_Z:
|
||||
// process normally
|
||||
break;
|
||||
case KC_1 ... KC_0:
|
||||
case KC_TAB ... KC_SEMICOLON:
|
||||
case KC_GRAVE ... KC_SLASH:
|
||||
// Set a word boundary if space, period, digit, etc. is pressed.
|
||||
keycode = KC_SPC;
|
||||
break;
|
||||
case KC_ENTER:
|
||||
// Behave more conservatively for the enter key. Reset, so that enter
|
||||
// can't be used on a word ending.
|
||||
typo_buffer_size = 0;
|
||||
keycode = KC_SPC;
|
||||
break;
|
||||
case KC_BSPC:
|
||||
// Remove last character from the buffer.
|
||||
if (typo_buffer_size > 0) {
|
||||
--typo_buffer_size;
|
||||
}
|
||||
return true;
|
||||
case KC_QUOTE:
|
||||
// Treat " (shifted ') as a word boundary.
|
||||
if ((mods & MOD_MASK_SHIFT) != 0) {
|
||||
keycode = KC_SPC;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
// Clear state if some other non-alpha key is pressed.
|
||||
typo_buffer_size = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Rotate oldest character if buffer is full.
|
||||
if (typo_buffer_size >= AUTOCORRECT_MAX_LENGTH) {
|
||||
memmove(typo_buffer, typo_buffer + 1, AUTOCORRECT_MAX_LENGTH - 1);
|
||||
typo_buffer_size = AUTOCORRECT_MAX_LENGTH - 1;
|
||||
}
|
||||
|
||||
// Append `keycode` to buffer.
|
||||
typo_buffer[typo_buffer_size++] = keycode;
|
||||
// Return if buffer is smaller than the shortest word.
|
||||
if (typo_buffer_size < AUTOCORRECT_MIN_LENGTH) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check for typo in buffer using a trie stored in `autocorrect_data`.
|
||||
uint16_t state = 0;
|
||||
uint8_t code = pgm_read_byte(autocorrect_data + state);
|
||||
for (int8_t i = typo_buffer_size - 1; i >= 0; --i) {
|
||||
uint8_t const key_i = typo_buffer[i];
|
||||
|
||||
if (code & 64) { // Check for match in node with multiple children.
|
||||
code &= 63;
|
||||
for (; code != key_i; code = pgm_read_byte(autocorrect_data + (state += 3))) {
|
||||
if (!code) return true;
|
||||
}
|
||||
// Follow link to child node.
|
||||
state = (pgm_read_byte(autocorrect_data + state + 1) | pgm_read_byte(autocorrect_data + state + 2) << 8);
|
||||
// Check for match in node with single child.
|
||||
} else if (code != key_i) {
|
||||
return true;
|
||||
} else if (!(code = pgm_read_byte(autocorrect_data + (++state)))) {
|
||||
++state;
|
||||
}
|
||||
|
||||
// Stop if `state` becomes an invalid index. This should not normally
|
||||
// happen, it is a safeguard in case of a bug, data corruption, etc.
|
||||
if (state >= DICTIONARY_SIZE) {
|
||||
return true;
|
||||
}
|
||||
|
||||
code = pgm_read_byte(autocorrect_data + state);
|
||||
|
||||
if (code & 128) { // A typo was found! Apply autocorrect.
|
||||
const uint8_t backspaces = (code & 63) + !record->event.pressed;
|
||||
if (apply_autocorrect(backspaces, (char const *)(autocorrect_data + state + 1))) {
|
||||
for (uint8_t i = 0; i < backspaces; ++i) {
|
||||
tap_code(KC_BSPC);
|
||||
}
|
||||
send_string_P((char const *)(autocorrect_data + state + 1));
|
||||
}
|
||||
|
||||
if (keycode == KC_SPC) {
|
||||
typo_buffer[0] = KC_SPC;
|
||||
typo_buffer_size = 1;
|
||||
return true;
|
||||
} else {
|
||||
typo_buffer_size = 0;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
# pragma GCC pop_options
|
||||
#else
|
||||
# pragma message "Warning!!! Autocorrect is not corretly setup!"
|
||||
bool process_autocorrection(uint16_t keycode, keyrecord_t* record) {
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
@ -1,17 +0,0 @@
|
||||
// Copyright 2021 Google LLC
|
||||
// Copyright 2021 @filterpaper
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// Original source: https://getreuer.info/posts/keyboards/autocorrection
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "drashna.h"
|
||||
|
||||
bool process_autocorrection(uint16_t keycode, keyrecord_t *record);
|
||||
bool process_autocorrect_user(uint16_t *keycode, keyrecord_t *record, uint8_t *typo_buffer_size, uint8_t *mods);
|
||||
bool apply_autocorrect(uint8_t backspaces, const char *str);
|
||||
|
||||
bool autocorrect_is_enabled(void);
|
||||
void autocorrect_enable(void);
|
||||
void autocorrect_disable(void);
|
||||
void autocorrect_toggle(void);
|
||||
@ -1 +0,0 @@
|
||||
#include "autocorrect_data.h"
|
||||
@ -1,298 +0,0 @@
|
||||
# Copyright 2021-2022 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# https://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Python program to make autocorrection_data.h.
|
||||
|
||||
This program reads "autocorrection_dict.txt" and generates a C source file
|
||||
"autocorrection_data.h" with a serialized trie embedded as an array. Run this
|
||||
program without arguments like
|
||||
|
||||
$ python3 make_autocorrection_data.py
|
||||
|
||||
Or to read from a different typo dict file, pass it as the first argument like
|
||||
|
||||
$ python3 make_autocorrection_data.py dict.txt
|
||||
|
||||
Each line of the dict file defines one typo and its correction with the syntax
|
||||
"typo -> correction". Blank lines or lines starting with '#' are ignored.
|
||||
Example:
|
||||
|
||||
:thier -> their
|
||||
dosen't -> doesn't
|
||||
fitler -> filter
|
||||
lenght -> length
|
||||
ouput -> output
|
||||
widht -> width
|
||||
|
||||
See autocorrection_dict_extra.txt for a larger example.
|
||||
|
||||
For full documentation, see
|
||||
https://getreuer.info/posts/keyboards/autocorrection
|
||||
"""
|
||||
|
||||
import sys
|
||||
import textwrap
|
||||
from typing import Any, Dict, Iterator, List, Tuple
|
||||
|
||||
try:
|
||||
from english_words import english_words_lower_alpha_set as CORRECT_WORDS
|
||||
except ImportError:
|
||||
print('Autocorrection will falsely trigger when a typo is a substring of a '
|
||||
'correctly spelled word. To check for this, install the english_words '
|
||||
'package and rerun this script:\n\n pip install english_words\n')
|
||||
# Use a minimal word list as a fallback.
|
||||
CORRECT_WORDS = ('apparent', 'association', 'available', 'classification',
|
||||
'effect', 'entertainment', 'fantastic', 'information',
|
||||
'integrate', 'international', 'language', 'loosest',
|
||||
'manual', 'nothing', 'provides', 'reference', 'statehood',
|
||||
'technology', 'virtually', 'wealthier', 'wonderful')
|
||||
|
||||
KC_A = 4
|
||||
KC_SPC = 0x2c
|
||||
KC_QUOT = 0x34
|
||||
|
||||
TYPO_CHARS = dict(
|
||||
[
|
||||
("'", KC_QUOT),
|
||||
(':', KC_SPC), # "Word break" character.
|
||||
] +
|
||||
# Characters a-z.
|
||||
[(chr(c), c + KC_A - ord('a')) for c in range(ord('a'), ord('z') + 1)]
|
||||
)
|
||||
|
||||
|
||||
def parse_file(file_name: str) -> List[Tuple[str, str]]:
|
||||
"""Parses autocorrections dictionary file.
|
||||
|
||||
Each line of the file defines one typo and its correction with the syntax
|
||||
"typo -> correction". Blank lines or lines starting with '#' are ignored. The
|
||||
function validates that typos only have characters in TYPO_CHARS, that
|
||||
typos are not substrings of other typos, and checking that typos don't trigger
|
||||
on CORRECT_WORDS.
|
||||
|
||||
Args:
|
||||
file_name: String, path of the autocorrections dictionary.
|
||||
Returns:
|
||||
List of (typo, correction) tuples.
|
||||
"""
|
||||
correct_words = ('information', 'available', 'international', 'language', 'loosest', 'reference', 'wealthier', 'entertainment', 'association', 'provides', 'technology', 'statehood')
|
||||
autocorrections = []
|
||||
typos = set()
|
||||
for line_number, typo, correction in parse_file_lines(file_name):
|
||||
if typo in typos:
|
||||
print(f'Warning:{line_number}: Ignoring duplicate typo: "{typo}"')
|
||||
continue
|
||||
|
||||
# Check that `typo` is valid.
|
||||
if not(all([c in TYPO_CHARS for c in typo])):
|
||||
print(f'Error:{line_number}: Typo "{typo}" has '
|
||||
'characters other than ' + ''.join(TYPO_CHARS.keys()))
|
||||
sys.exit(1)
|
||||
for other_typo in typos:
|
||||
if typo in other_typo or other_typo in typo:
|
||||
print(f'Error:{line_number}: Typos may not be substrings of one '
|
||||
f'another, otherwise the longer typo would never trigger: '
|
||||
f'"{typo}" vs. "{other_typo}".')
|
||||
sys.exit(1)
|
||||
if len(typo) < 5:
|
||||
print(f'Warning:{line_number}: It is suggested that typos are at '
|
||||
f'least 5 characters long to avoid false triggers: "{typo}"')
|
||||
|
||||
check_typo_against_dictionary(typo, line_number, correct_words)
|
||||
|
||||
autocorrections.append((typo, correction))
|
||||
typos.add(typo)
|
||||
|
||||
return autocorrections
|
||||
|
||||
|
||||
def make_trie(autocorrections: List[Tuple[str, str]]) -> Dict[str, Any]:
|
||||
"""Makes a trie from the the typos, writing in reverse.
|
||||
|
||||
Args:
|
||||
autocorrections: List of (typo, correction) tuples.
|
||||
Returns:
|
||||
Dict of dict, representing the trie.
|
||||
"""
|
||||
trie = {}
|
||||
for typo, correction in autocorrections:
|
||||
node = trie
|
||||
for letter in typo[::-1]:
|
||||
node = node.setdefault(letter, {})
|
||||
node['LEAF'] = (typo, correction)
|
||||
|
||||
return trie
|
||||
|
||||
|
||||
def parse_file_lines(file_name: str) -> Iterator[Tuple[int, str, str]]:
|
||||
"""Parses lines read from `file_name` into typo-correction pairs."""
|
||||
|
||||
line_number = 0
|
||||
for line in open(file_name, 'rt'):
|
||||
line_number += 1
|
||||
line = line.strip()
|
||||
if line and line[0] != '#':
|
||||
# Parse syntax "typo -> correction", using strip to ignore indenting.
|
||||
tokens = [token.strip() for token in line.split('->', 1)]
|
||||
if len(tokens) != 2 or not tokens[0]:
|
||||
print(f'Error:{line_number}: Invalid syntax: "{line}"')
|
||||
sys.exit(1)
|
||||
|
||||
typo, correction = tokens
|
||||
typo = typo.lower() # Force typos to lowercase.
|
||||
typo = typo.replace(' ', ':')
|
||||
|
||||
yield line_number, typo, correction
|
||||
|
||||
|
||||
def check_typo_against_dictionary(typo: str, line_number: int, correct_words) -> None:
|
||||
"""Checks `typo` against English dictionary words."""
|
||||
|
||||
if typo.startswith(':') and typo.endswith(':'):
|
||||
if typo[1:-1] in correct_words:
|
||||
print(f'Warning:{line_number}: Typo "{typo}" is a correctly spelled dictionary word.')
|
||||
elif typo.startswith(':') and not typo.endswith(':'):
|
||||
for word in correct_words:
|
||||
if word.startswith(typo[1:]):
|
||||
print(f'Warning:{line_number}: Typo "{typo}" would falsely trigger on correctly spelled word "{word}".')
|
||||
elif not typo.startswith(':') and typo.endswith(':'):
|
||||
for word in correct_words:
|
||||
if word.endswith(typo[:-1]):
|
||||
print(f'Warning:{line_number}: Typo "{typo}" would falsely trigger on correctly spelled word "{word}".')
|
||||
elif not typo.startswith(':') and not typo.endswith(':'):
|
||||
for word in correct_words:
|
||||
if typo in word:
|
||||
print(f'Warning:{line_number}: Typo "{typo}" would falsely trigger on correctly spelled word "{word}".')
|
||||
|
||||
|
||||
def serialize_trie(autocorrections: List[Tuple[str, str]],
|
||||
trie: Dict[str, Any]) -> List[int]:
|
||||
"""Serializes trie and correction data in a form readable by the C code.
|
||||
|
||||
Args:
|
||||
autocorrections: List of (typo, correction) tuples.
|
||||
trie: Dict of dicts.
|
||||
Returns:
|
||||
List of ints in the range 0-255.
|
||||
"""
|
||||
table = []
|
||||
|
||||
# Traverse trie in depth first order.
|
||||
def traverse(trie_node: Dict[str, Any]) -> Dict[str, Any]:
|
||||
if 'LEAF' in trie_node: # Handle a leaf trie node.
|
||||
typo, correction = trie_node['LEAF']
|
||||
word_boundary_ending = typo[-1] == ':'
|
||||
typo = typo.strip(':')
|
||||
i = 0 # Make the autocorrection data for this entry and serialize it.
|
||||
while i < min(len(typo), len(correction)) and typo[i] == correction[i]:
|
||||
i += 1
|
||||
backspaces = len(typo) - i - 1 + word_boundary_ending
|
||||
assert 0 <= backspaces <= 63
|
||||
correction = correction[i:]
|
||||
data = [backspaces + 128] + list(bytes(correction, 'ascii')) + [0]
|
||||
|
||||
entry = {'data': data, 'links': [], 'byte_offset': 0}
|
||||
table.append(entry)
|
||||
elif len(trie_node) == 1: # Handle trie node with a single child.
|
||||
c, trie_node = next(iter(trie_node.items()))
|
||||
entry = {'chars': c, 'byte_offset': 0}
|
||||
|
||||
# It's common for a trie to have long chains of single-child nodes. We
|
||||
# find the whole chain so that we can serialize it more efficiently.
|
||||
while len(trie_node) == 1 and 'LEAF' not in trie_node:
|
||||
c, trie_node = next(iter(trie_node.items()))
|
||||
entry['chars'] += c
|
||||
|
||||
table.append(entry)
|
||||
entry['links'] = [traverse(trie_node)]
|
||||
else: # Handle trie node with multiple children.
|
||||
entry = {'chars': ''.join(sorted(trie_node.keys())), 'byte_offset': 0}
|
||||
table.append(entry)
|
||||
entry['links'] = [traverse(trie_node[c]) for c in entry['chars']]
|
||||
return entry
|
||||
|
||||
traverse(trie)
|
||||
|
||||
def serialize(e: Dict[str, Any]) -> List[int]:
|
||||
if not e['links']: # Handle a leaf table entry.
|
||||
return e['data']
|
||||
elif len(e['links']) == 1: # Handle a chain table entry.
|
||||
return [TYPO_CHARS[c] for c in e['chars']] + [0]
|
||||
else: # Handle a branch table entry.
|
||||
data = []
|
||||
for c, link in zip(e['chars'], e['links']):
|
||||
data += [TYPO_CHARS[c] | (0 if data else 64)] + encode_link(link)
|
||||
return data + [0]
|
||||
|
||||
byte_offset = 0
|
||||
for e in table: # To encode links, first compute byte offset of each entry.
|
||||
e['byte_offset'] = byte_offset
|
||||
byte_offset += len(serialize(e))
|
||||
|
||||
return [b for e in table for b in serialize(e)] # Serialize final table.
|
||||
|
||||
|
||||
def encode_link(link: Dict[str, Any]) -> List[int]:
|
||||
"""Encodes a node link as two bytes."""
|
||||
byte_offset = link['byte_offset']
|
||||
if not (0 <= byte_offset <= 0xffff):
|
||||
print('Error: The autocorrection table is too large, a node link exceeds '
|
||||
'64KB limit. Try reducing the autocorrection dict to fewer entries.')
|
||||
sys.exit(1)
|
||||
return [byte_offset & 255, byte_offset >> 8]
|
||||
|
||||
|
||||
def write_generated_code(autocorrections: List[Tuple[str, str]],
|
||||
data: List[int],
|
||||
file_name: str) -> None:
|
||||
"""Writes autocorrection data as generated C code to `file_name`.
|
||||
|
||||
Args:
|
||||
autocorrections: List of (typo, correction) tuples.
|
||||
data: List of ints in 0-255, the serialized trie.
|
||||
file_name: String, path of the output C file.
|
||||
"""
|
||||
assert all(0 <= b <= 255 for b in data)
|
||||
|
||||
def typo_len(e: Tuple[str, str]) -> int:
|
||||
return len(e[0])
|
||||
|
||||
min_typo = min(autocorrections, key=typo_len)[0]
|
||||
max_typo = max(autocorrections, key=typo_len)[0]
|
||||
generated_code = ''.join([
|
||||
'// Generated code.\n\n',
|
||||
f'// Autocorrection dictionary ({len(autocorrections)} entries):\n',
|
||||
''.join(sorted(f'// {typo:<{len(max_typo)}} -> {correction}\n'
|
||||
for typo, correction in autocorrections)),
|
||||
f'\n#define AUTOCORRECTION_MIN_LENGTH {len(min_typo)} // "{min_typo}"\n',
|
||||
f'#define AUTOCORRECTION_MAX_LENGTH {len(max_typo)} // "{max_typo}"\n\n',
|
||||
textwrap.fill('static const uint8_t autocorrection_data[%d] PROGMEM = {%s};' % (
|
||||
len(data), ', '.join(map(str, data))), width=80, subsequent_indent=' '),
|
||||
'\n\n'])
|
||||
|
||||
with open(file_name, 'wt') as f:
|
||||
f.write(generated_code)
|
||||
|
||||
|
||||
def main(argv):
|
||||
dict_file = argv[1] if len(argv) > 1 else 'autocorrection_dict.txt'
|
||||
autocorrections = parse_file(dict_file)
|
||||
trie = make_trie(autocorrections)
|
||||
data = serialize_trie(autocorrections, trie)
|
||||
print(f'Processed %d autocorrection entries to table with %d bytes.'
|
||||
% (len(autocorrections), len(data)))
|
||||
write_generated_code(autocorrections, data, 'autocorrection_data.h')
|
||||
|
||||
if __name__ == '__main__':
|
||||
main(sys.argv)
|
||||
Loading…
Reference in New Issue