Answer the question
In order to leave comments, you need to log in
How to find a partial match of strings?
I have a list of strings, here is an example of one
0 .. tubes were used to burn the steel ladle. Replacement of the funnel 18m 8sl. Spilled completely.
Answer the question
In order to leave comments, you need to log in
Without additional libraries and using the Damerau-Levenshtein distance, you can do something like this:
import re
def get_substrings(string):
"""Функция разбивки на слова"""
return re.split('\W+', string)
def get_distance(s1, s2):
"""Расстояние Дамерау-Левенштейна"""
d, len_s1, len_s2 = {}, len(s1), len(s2)
for i in range(-1, len_s1 + 1):
d[(i, -1)] = i + 1
for j in range(-1, len_s2 + 1):
d[(-1, j)] = j + 1
for i in range(len_s1):
for j in range(len_s2):
if s1[i] == s2[j]:
cost = 0
else:
cost = 1
d[(i, j)] = min(
d[(i - 1, j)] + 1,
d[(i, j - 1)] + 1,
d[(i - 1, j - 1)] + cost)
if i and j and s1[i] == s2[j - 1] and s1[i - 1] == s2[j]:
d[(i, j)] = min(d[(i, j)], d[i - 2, j - 2] + cost)
return(d[len_s1 - 1, len_s2 - 1])
def check_substring(search_request, original_text, max_distance):
"""Проверка нечёткого вхождения одного набора слов в другой"""
substring_list_1 = get_substrings(search_request)
substring_list_2 = get_substrings(original_text)
not_found_count = len(substring_list_1)
for substring_1 in substring_list_1:
for substring_2 in substring_list_2:
if get_distance(substring_1, substring_2) <= max_distance:
not_found_count -= 1
if not not_found_count:
return True
search_request = 'трубок использовали для прожигания'
original_text = 'трубок использовали для прожигания стальковша.Замена воронки 18м 8сл. Разлита полностью'
result = check_substring(search_request, original_text, max_distance=2)
print(result) # True если найдено, иначе None
Didn't find what you were looking for?
Ask your questionAsk a Question
731 491 924 answers to any question