数据结构之前缀树

前缀树,也叫字典树,Trie

前缀树 是一种树形数据结构,用于高效地存储和检索字符串数据集中的键。这一数据结构有相当多的应用情景,例如自动补完和拼写检查。

可以简单地认为,Trie是一种26叉树,每一个节点都有26个子节点,对应a-z。

单词不存储于字典树的节点内,而是字典树从根节点到该节点的路径

在具体实现上,python使用字典性能更好,而C++则是使用数组性能更好

字典树练习题集 https://leetcode-cn.com/tag/trie/problemset/

模板

1
2
3
4
5
6
7
8
root={}
def insert(word):
cur = root
for w in word:
if w not in cur:
cur[w]={}
cur = cur[w]
cur['#']='#'

实现 Trie (前缀树)

模板

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
class Trie:

def __init__(self):
self.root={}

def insert(self, word: str) -> None:
cur=self.root
for w in word: # in 判断键是否在字典中
if w not in cur:
cur[w]={}
cur = cur[w]
cur['#']='#'

def search(self, word: str) -> bool:
cur = self.root
for w in word:
if w not in cur:
return False
cur = cur[w]
return cur.get('#')


def startsWith(self, prefix: str) -> bool:
cur = self.root
for w in prefix:
if w not in cur:
return False
cur = cur[w]
return True

添加与搜索单词 - 数据结构设计

注意if cur[c]=='#':continue,不能忘了

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
class WordDictionary:

def __init__(self):
self.root={}

def addWord(self, word: str) -> None:
cur=self.root
for w in word:
if w not in cur:
cur[w]={}
cur = cur[w]
cur['#']='#'

def search(self, word: str) -> bool:
def dfs(cur,word):
for i,w in enumerate(word):
if w=='.':
for c in cur:
if cur[c]=='#':continue
if dfs(cur[c],word[i+1:]):return True
return False

if w not in cur:
return False
cur=cur[w]
return '#' in cur
return dfs(self.root,word)

词典中最长的单词

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
class Solution:
def longestWord(self, words: List[str]) -> str:
root={}
def insert(word):
cur = root
for w in word:
if w not in cur:
cur[w]={}
cur= cur[w]
cur['#'] = '#'

def helper(word):
'''判断该单词是否由words词典中其他单词逐步添加一个字母组成'''
cur=root
for w in word:
cur=cur[w]
if '#' not in cur:
return False
return True

for w in words:
insert(w)

res=0 # 最大长度
for w in words:
if helper(w):
res=max(res,len(w))

ans=[]
for w in words:
if helper(w) and len(w)==res:
ans.append(w)

ans.sort()
return "" if not ans else ans[0]

字典序排数

字典树+DFS

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
class Solution:
def lexicalOrder(self, n: int) -> List[int]:
root={}
def insert(word):
cur=root
for w in word:
if w not in cur:
cur[w]={}
cur=cur[w]
cur['#']='#'

for i in range(1,n+1):
insert(str(i))

def dfs(root,path):
if '#' in root:
res.append(''.join(path[:]))
if len(root)==1:
return
for r in root:
if r=='#':continue
path.append(r)
dfs(root[r],path)
path.pop()

res=[]
dfs(root,[])
return list(map(int,res))

单词替换

找到一个单词的前缀直接返回,可以保证最短

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
class Solution:
def replaceWords(self, dictionary: List[str], sentence: str) -> str:
root={}
def insert(word):
cur=root
for w in word:
if w not in cur:
cur[w]={}
cur=cur[w]
cur['#']='#'

def match(word):
cur=root
res=''
for w in word:
if w not in cur:
return
cur=cur[w]
res+=w
if cur.get('#'):
return res

for d in dictionary:
insert(d)

res=[]
for word in sentence.split():
tmp=match(word)
if not tmp:
res.append(word)
else:
res.append(tmp)
return ' '.join(res)

实现一个魔法字典

单词比如hello,可以构造候选单词列表:aello……zello,hallo……hzllo,然后在trie里面搜索

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
class MagicDictionary:

def __init__(self):
self.root={}

def buildDict(self, dictionary: List[str]) -> None:
def insert(word):
cur=self.root
for w in word:
if w not in cur:
cur[w]={}
cur=cur[w]
cur['#']='#'

for d in dictionary:
insert(d)

def search(self, searchWord: str) -> bool:
def dfs(word):
cur=self.root
for w in word:
if w not in cur:
return False
cur=cur[w]
return cur.get('#')

searchWord=list(searchWord)
words=[chr(i) for i in range(ord('a'),ord('z')+1)]
for i,w in enumerate(searchWord):
for x in words:
if x!=w:
newWord=searchWord[:i]+[x]+searchWord[i+1:]
if dfs(''.join(newWord)):
return True

return False

键值映射

找到根节点后,遍历多叉树累加val

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
class MapSum:

def __init__(self):
self.root={}

def insert(self, key: str, val: int) -> None:
cur=self.root
for k in key:
if k not in cur:
cur[k]={}
cur=cur[k]
cur['#']=val #!!!

def sum(self, prefix: str) -> int:
cur=self.root
res=[0]
for p in prefix:
if p not in cur:
return 0
cur=cur[p] # 找到根节点

def dfs(root):
if root.get('#'):
res[0]+=root['#']
for r in root:
if isinstance(root[r],dict):
dfs(root[r])

dfs(cur)
return res[0]

面试题 16.02. 单词频率

用cur[‘#’]记录频率

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
class WordsFrequency:

def __init__(self, book: List[str]):
self.root={}
def insert(word):
cur=self.root
for w in word:
if w not in cur:
cur[w]={}
cur=cur[w]
if cur.get('#'):
cur['#']+=1
else:
cur['#']=1
for b in book:
insert(b)

def get(self, word: str) -> int:
cur=self.root
for w in word:
if w not in cur:
return 0
cur=cur[w]
if cur.get('#'):
return cur['#']
else:
return 0
不要打赏,只求关注呀QAQ