博文中会简要介绍Leetcode P0072题目分析及解题思路。
“Edit Distance”是一道非常经典的动态规划问题,这个问题在以前的NLP领域会经常碰到,而动态规划则是非常常用的解决方法。
Given two words word1 and word2, find the minimum number of operations required to convert word1 to word2.
You have the following 3 operations permitted on a word:
- Insert a character
- Delete a character
- Replace a character
Example 1:
Input: word1 = "horse", word2 = "ros" Output: 3 Explanation: horse -> rorse (replace 'h' with 'r') rorse -> rose (remove 'r') rose -> ros (remove 'e')
这道题的核心思路是动态规划。具体的递推表达式如下:
令dp[i][j]表示word1前i个字符变换到word2前j个字符所需要的最少次数,即word1[0:i-1]到word2[0:j-1]的编辑距离。
dp[0][0] = 0
显然两个长度为0的字符串,编辑距离也为0
dp[i][0] = i
从word1[0:i-1]变换到空串,最短编辑距离为i
dp[0][j] = j
从空串变换到word2[0:j-1],最短编辑距离为j
dp[i][j] = dp[i-1][j-1], if word1[i-1] == word2[j-1]
若当前两个字符相等,则无需任何转换即可从word1[i-1]变换到word2[j-1],所以当前编辑距离等于word1[0:i-2]到word2[0:j-2]的编辑距离,即dp[i-1][j-1]
dp[i][j] = min(dp[i-1][j-1], min(dp[i][j-1], dp[i-1][j]))+1, otherwise
若当前两个字符不相等,则显然需要经过转换才能从word1[0:i-1]转换成word2[0:j-1],而一共有三种方式:
1. 替换,则dp[i][j] = dp[i-1][j-1]+1
2. 移除,则dp[i][j] = dp[i-1][j]+1, 即移除了word1[i-1],那么需要word1[0:i-2]需要和word2[0:j-1]的编辑距离
3. 插入,则dp[i][j] = dp[i][j-1]+1, 即插入一个字符在word1[i-1]后面,使其和word2[j-1]匹配,那么需要word1[0:i-1]和word2[0:j-2]的编辑距离
最终dp[i][j]是这三个转换产生的编辑距离中的最小值
以下是Java的题解代码实现。
class Solution {
public int minDistance(String word1, String word2) {
int len1 = word1.length(), len2 = word2.length();
if (len1 == 0 || len2 == 0)
return (len1 == 0)? len2: len1;
int[][] dp = new int[len1+1][len2+1];
for (int i1=0; i1<=len1; ++i1)
dp[i1][0] = i1;
for (int i1=0; i1<=len2; ++i1)
dp[0][i1] = i1;
dp[0][0] = 0;
// DP
for (int i1=1; i1<=len1; ++i1) {
for (int j2=1; j2<=len2; ++j2) {
if (word1.charAt(i1-1) == word2.charAt(j2-1))
dp[i1][j2] = dp[i1-1][j2-1];
else {
dp[i1][j2] = Math.min(dp[i1-1][j2-1], Math.min(dp[i1][j2-1], dp[i1-1][j2]))+1;
}
}
}
return dp[len1][len2];
}
}
以下是C++的题解代码实现。
class Solution {
public:
int minDistance(string word1, string word2) {
int len1 = word1.length(), len2 = word2.length();
if (len1 == 0 || len2 == 0)
return (len1 == 0)? len2: len1;
vector<vector<int>> dp(len1+1, vector<int>(len2+1, 0));
for (int i1=0; i1<=len1; ++i1)
dp[i1][0] = i1;
for (int i1=0; i1<=len2; ++i1)
dp[0][i1] = i1;
dp[0][0] = 0;
// DP
for (int i1=1; i1<=len1; ++i1) {
for (int j2=1; j2<=len2; ++j2) {
if (word1[i1-1] == word2[j2-1])
dp[i1][j2] = dp[i1-1][j2-1];
else {
dp[i1][j2] = min(dp[i1-1][j2-1], min(dp[i1][j2-1], dp[i1-1][j2]))+1;
}
}
}
return dp[len1][len2];
}
};