前缀树，也叫字典树，Trie

前缀树 是一种树形数据结构，用于高效地存储和检索字符串数据集中的键。这一数据结构有相当多的应用情景，例如自动补完和拼写检查。

可以简单地认为，Trie是一种26叉树，每一个节点都有26个子节点，对应a-z。

单词不存储于字典树的节点内，而是字典树从根节点到该节点的路径

在具体实现上，python使用字典性能更好，而C++则是使用数组性能更好

字典树练习题集

https://leetcode-cn.com/tag/trie/problemset/

模板

root={}
def insert(word):
    cur = root
    for w in word:
        if w not in cur:
            cur[w]={}
        cur = cur[w]
    cur['#']='#'

实现 Trie (前缀树)

模板

class Trie:

    def __init__(self):
        self.root={}

    def insert(self, word: str) -> None:
        cur=self.root
        for w in word:  # in 判断键是否在字典中
            if w not in cur:
                cur[w]={}
            cur = cur[w]
        cur['#']='#'

    def search(self, word: str) -> bool:
        cur = self.root
        for w in word:
            if w not in cur:
                return False
            cur = cur[w]
        return cur.get('#')


    def startsWith(self, prefix: str) -> bool:
        cur = self.root
        for w in prefix:
            if w not in cur:
                return False
            cur = cur[w]
        return True

添加与搜索单词 - 数据结构设计

注意if cur[c]=='#':continue，不能忘了

class WordDictionary:

    def __init__(self):
        self.root={}

    def addWord(self, word: str) -> None:
        cur=self.root
        for w in word:
            if w not in cur:
                cur[w]={}
            cur = cur[w]
        cur['#']='#'

    def search(self, word: str) -> bool:
        def dfs(cur,word):
            for i,w in enumerate(word):
                if w=='.':
                    for c in cur:
                        if cur[c]=='#':continue
                        if dfs(cur[c],word[i+1:]):return True
                    return False

                if w not in cur:
                    return False
                cur=cur[w]
            return '#' in cur
        return dfs(self.root,word)

词典中最长的单词

class Solution:
    def longestWord(self, words: List[str]) -> str:
        root={}
        def insert(word):
            cur = root
            for w in word:
                if w not in cur:
                    cur[w]={}
                cur= cur[w]
            cur['#'] = '#'
        
        def helper(word):
            '''判断该单词是否由words词典中其他单词逐步添加一个字母组成'''
            cur=root
            for w in word:
                cur=cur[w]
                if '#' not in cur:
                    return False
            return True

        for w in words:
            insert(w)
        
        res=0   # 最大长度
        for w in words:
            if helper(w):
                res=max(res,len(w))
        
        ans=[]
        for w in words:
            if helper(w) and len(w)==res:
                ans.append(w)
        
        ans.sort()
        return "" if not ans else ans[0]

字典序排数

字典树+DFS

class Solution:
    def lexicalOrder(self, n: int) -> List[int]:
        root={}
        def insert(word):
            cur=root
            for w in word:
                if w not in cur:
                    cur[w]={}
                cur=cur[w]
            cur['#']='#'
        
        for i in range(1,n+1):
            insert(str(i))
        
        def dfs(root,path):
            if '#' in root:
                res.append(''.join(path[:]))
                if len(root)==1:
                    return
            for r in root:
                if r=='#':continue
                path.append(r)
                dfs(root[r],path)
                path.pop()
            
        res=[]
        dfs(root,[])
        return list(map(int,res))

单词替换

找到一个单词的前缀直接返回，可以保证最短

class Solution:
    def replaceWords(self, dictionary: List[str], sentence: str) -> str:
        root={}
        def insert(word):
            cur=root
            for w in word:
                if w not in cur:
                    cur[w]={}
                cur=cur[w]
            cur['#']='#'
        
        def match(word):
            cur=root
            res=''
            for w in word:
                if w not in cur:
                    return
                cur=cur[w]
                res+=w
                if cur.get('#'):
                    return res
        
        for d in dictionary:
            insert(d)
        
        res=[]
        for word in sentence.split():
            tmp=match(word)
            if not tmp:
                res.append(word)
            else:
                res.append(tmp)
        return ' '.join(res)

实现一个魔法字典

单词比如hello，可以构造候选单词列表：aello……zello，hallo……hzllo，然后在trie里面搜索

class MagicDictionary:

    def __init__(self):
        self.root={}

    def buildDict(self, dictionary: List[str]) -> None:
        def insert(word):
            cur=self.root
            for w in word:
                if w not in cur:
                    cur[w]={}
                cur=cur[w]
            cur['#']='#'
        
        for d in dictionary:
            insert(d)

    def search(self, searchWord: str) -> bool:
        def dfs(word):
            cur=self.root
            for w in word:
                if w not in cur:
                    return False
                cur=cur[w]
            return cur.get('#')

        searchWord=list(searchWord)
        words=[chr(i) for i in range(ord('a'),ord('z')+1)]
        for i,w in enumerate(searchWord):
            for x in words:
                if x!=w:
                    newWord=searchWord[:i]+[x]+searchWord[i+1:]
                    if dfs(''.join(newWord)):
                        return True
        
        return False

键值映射

找到根节点后，遍历多叉树累加val

class MapSum:

    def __init__(self):
        self.root={}

    def insert(self, key: str, val: int) -> None:
        cur=self.root
        for k in key:
            if k not in cur:
                cur[k]={}
            cur=cur[k]
        cur['#']=val    #!!!

    def sum(self, prefix: str) -> int:
        cur=self.root
        res=[0]
        for p in prefix:
            if p not in cur:
                return 0
            cur=cur[p]  # 找到根节点
        
        def dfs(root):
            if root.get('#'):
                res[0]+=root['#']
            for r in root:
                if isinstance(root[r],dict):
                    dfs(root[r])

        dfs(cur)
        return res[0]

面试题 16.02. 单词频率

用cur[‘#’]记录频率

class WordsFrequency:

    def __init__(self, book: List[str]):
        self.root={}
        def insert(word):
            cur=self.root
            for w in word:
                if w not in cur:
                    cur[w]={}
                cur=cur[w]
            if cur.get('#'):
                cur['#']+=1
            else:  
                cur['#']=1
        for b in book:
            insert(b)

    def get(self, word: str) -> int:
        cur=self.root
        for w in word:
            if w not in cur:
                return 0
            cur=cur[w]
        if cur.get('#'):
            return cur['#']
        else:
            return 0