init
This commit is contained in:
141
resource/datasets/__init__.py
Normal file
141
resource/datasets/__init__.py
Normal file
@@ -0,0 +1,141 @@
|
||||
import difflib
|
||||
import os
|
||||
import typing
|
||||
import urllib.request
|
||||
|
||||
import simplejson as json
|
||||
|
||||
|
||||
class FuzzyDict(dict):
|
||||
"""Provides a dictionary that performs fuzzy lookup"""
|
||||
|
||||
def __init__(self, cutoff: float = 0.6):
|
||||
"""Construct a new FuzzyDict instance
|
||||
|
||||
items is an dictionary to copy items from (optional)
|
||||
cutoff is the match ratio below which matches should not be considered
|
||||
cutoff needs to be a float between 0 and 1 (where zero is no match
|
||||
and 1 is a perfect match)"""
|
||||
super(FuzzyDict, self).__init__()
|
||||
self.cutoff = cutoff
|
||||
|
||||
# short wrapper around some super (dict) methods
|
||||
self._dict_contains = lambda key: super(FuzzyDict, self).__contains__(key)
|
||||
self._dict_getitem = lambda key: super(FuzzyDict, self).__getitem__(key)
|
||||
|
||||
def _search(self, lookfor: typing.Any, stop_on_first: bool = False):
|
||||
"""Returns the value whose key best matches lookfor
|
||||
|
||||
if stop_on_first is True then the method returns as soon
|
||||
as it finds the first item
|
||||
"""
|
||||
|
||||
# if the item is in the dictionary then just return it
|
||||
if self._dict_contains(lookfor):
|
||||
return True, lookfor, self._dict_getitem(lookfor), 1
|
||||
|
||||
# set up the fuzzy matching tool
|
||||
ratio_calc = difflib.SequenceMatcher()
|
||||
ratio_calc.set_seq1(lookfor)
|
||||
|
||||
# test each key in the dictionary
|
||||
best_ratio = 0
|
||||
best_match = None
|
||||
best_key = None
|
||||
for key in self:
|
||||
# if the current key is not a string
|
||||
# then we just skip it
|
||||
try:
|
||||
# set up the SequenceMatcher with other text
|
||||
ratio_calc.set_seq2(key)
|
||||
except TypeError:
|
||||
continue
|
||||
|
||||
# we get an error here if the item to look for is not a
|
||||
# string - if it cannot be fuzzy matched and we are here
|
||||
# this it is definitely not in the dictionary
|
||||
try:
|
||||
# calculate the match value
|
||||
ratio = ratio_calc.ratio()
|
||||
except TypeError:
|
||||
break
|
||||
|
||||
# if this is the best ratio so far - save it and the value
|
||||
if ratio > best_ratio:
|
||||
best_ratio = ratio
|
||||
best_key = key
|
||||
best_match = self._dict_getitem(key)
|
||||
|
||||
if stop_on_first and ratio >= self.cutoff:
|
||||
break
|
||||
|
||||
return best_ratio >= self.cutoff, best_key, best_match, best_ratio
|
||||
|
||||
def __contains__(self, item: typing.Any):
|
||||
if self._search(item, True)[0]:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def __getitem__(self, lookfor: typing.Any):
|
||||
matched, key, item, ratio = self._search(lookfor)
|
||||
|
||||
if not matched:
|
||||
raise KeyError(
|
||||
"'%s'. closest match: '%s' with ratio %.3f"
|
||||
% (str(lookfor), str(key), ratio)
|
||||
)
|
||||
|
||||
return item
|
||||
|
||||
|
||||
__HERE = os.path.abspath(os.path.dirname(__file__))
|
||||
with open(os.path.join(__HERE, "map_filename.json"), "r", encoding="utf8") as f:
|
||||
FILENAMES: FuzzyDict = FuzzyDict()
|
||||
for k, v in json.load(f).items():
|
||||
FILENAMES[k] = v
|
||||
|
||||
with open(os.path.join(__HERE, "city_coordinates.json"), "r", encoding="utf8") as f:
|
||||
COORDINATES: FuzzyDict = FuzzyDict()
|
||||
for k, v in json.load(f).items():
|
||||
COORDINATES[k] = v
|
||||
|
||||
EXTRA = {}
|
||||
|
||||
|
||||
def register_url(asset_url: str):
|
||||
if asset_url:
|
||||
registry = asset_url + "/registry.json"
|
||||
try:
|
||||
contents = urllib.request.urlopen(registry).read()
|
||||
contents = json.loads(contents)
|
||||
except Exception as e:
|
||||
raise e
|
||||
files = {}
|
||||
pinyin_names = set()
|
||||
for name, pinyin in contents["PINYIN_MAP"].items():
|
||||
file_name = contents["FILE_MAP"][pinyin]
|
||||
files[name] = [file_name, "js"]
|
||||
pinyin_names.add(pinyin)
|
||||
|
||||
for key, file_name in contents["FILE_MAP"].items():
|
||||
if key not in pinyin_names:
|
||||
# English names
|
||||
files[key] = [file_name, "js"]
|
||||
|
||||
js_folder_name = contents["JS_FOLDER"]
|
||||
if js_folder_name == "/":
|
||||
js_file_prefix = f"{asset_url}/"
|
||||
else:
|
||||
js_file_prefix = f"{asset_url}/{js_folder_name}/"
|
||||
EXTRA[js_file_prefix] = files
|
||||
|
||||
|
||||
def register_files(asset_files: dict):
|
||||
if asset_files:
|
||||
FILENAMES.update(asset_files)
|
||||
|
||||
|
||||
def register_coords(coords: dict):
|
||||
if coords:
|
||||
COORDINATES.update(coords)
|
||||
Reference in New Issue
Block a user