Problem Statement: Edit Distance
Edit Distance (also known as Levenshtein distance) is a measure of the difference between two strings. It is the minimum number of operations required to convert one string into another. The operations allowed are:
- Insertion of a character.
- Deletion of a character.
- Substitution of one character for another.
The goal is to determine the minimum number of operations required to transform one string into another.
Example
Let’s consider the two strings:
- Source String (s1) = “kitten”
- Target String (s2) = “sitting”
Step-by-Step Transformation:
- kitten → sitten (Substitute ‘k’ with ‘s’)
- sitten → sittin (Remove ‘e’)
- sittin → sitting (Insert ‘g’)
Thus, the minimum edit distance between “kitten” and “sitting” is 3.
Approach & Algorithm (Dynamic Programming)
The Dynamic Programming (DP) approach to solving the Edit Distance problem works by filling a 2D table where each cell represents the edit distance between two substrings. We fill this table iteratively, starting from the base cases where one string is empty, and then using previously computed values to compute the distances for larger substrings.
Steps:
- Define a 2D array
dp
wheredp[i][j]
represents the edit distance between the firsti
characters of strings1
and the firstj
characters of strings2
. - Initialize the base cases:
- If one string is empty, the distance is the length of the other string (because we need to insert all the characters).
- For each pair of characters
s1[i]
ands2[j]
, determine if they are equal. If they are, no operation is needed, so take the value fromdp[i-1][j-1]
. If they are not equal, take the minimum of the three possible operations: insertion, deletion, and substitution.
Time and Space Complexity
- Time Complexity: O(m * n), where
m
is the length of strings1
andn
is the length of strings2
. This is because we fill a table of sizem x n
. - Space Complexity: O(m * n), for the DP table. However, this can be optimized to O(min(m, n)) by using a 1D array if we only care about the current and previous rows of the table.
Code Implementations
1. C
#include <stdio.h>
#include <string.h>
#include <algorithm>
using namespace std;
int min(int a, int b, int c) {
return std::min(std::min(a, b), c);
}
int editDistance(char *s1, char *s2) {
int m = strlen(s1);
int n = strlen(s2);
int dp[m + 1][n + 1];
for (int i = 0; i <= m; i++) {
for (int j = 0; j <= n; j++) {
if (i == 0) dp[i][j] = j;
else if (j == 0) dp[i][j] = i;
else if (s1[i - 1] == s2[j - 1]) dp[i][j] = dp[i - 1][j - 1];
else dp[i][j] = min(dp[i - 1][j] + 1, dp[i][j - 1] + 1, dp[i - 1][j - 1] + 1);
}
}
return dp[m][n];
}
int main() {
char s1[] = "kitten";
char s2[] = "sitting";
printf("Edit Distance: %d\n", editDistance(s1, s2));
return 0;
}
2. C++
#include <iostream>
#include <vector>
#include <algorithm>
using namespace std;
int editDistance(string s1, string s2) {
int m = s1.size();
int n = s2.size();
vector<vector<int>> dp(m + 1, vector<int>(n + 1));
for (int i = 0; i <= m; i++) {
for (int j = 0; j <= n; j++) {
if (i == 0) dp[i][j] = j;
else if (j == 0) dp[i][j] = i;
else if (s1[i - 1] == s2[j - 1]) dp[i][j] = dp[i - 1][j - 1];
else dp[i][j] = min({dp[i - 1][j] + 1, dp[i][j - 1] + 1, dp[i - 1][j - 1] + 1});
}
}
return dp[m][n];
}
int main() {
string s1 = "kitten", s2 = "sitting";
cout << "Edit Distance: " << editDistance(s1, s2) << endl;
return 0;
}
3. Java
public class EditDistance {
public static int min(int a, int b, int c) {
return Math.min(Math.min(a, b), c);
}
public static int editDistance(String s1, String s2) {
int m = s1.length();
int n = s2.length();
int[][] dp = new int[m + 1][n + 1];
for (int i = 0; i <= m; i++) {
for (int j = 0; j <= n; j++) {
if (i == 0) dp[i][j] = j;
else if (j == 0) dp[i][j] = i;
else if (s1.charAt(i - 1) == s2.charAt(j - 1)) dp[i][j] = dp[i - 1][j - 1];
else dp[i][j] = min(dp[i - 1][j] + 1, dp[i][j - 1] + 1, dp[i - 1][j - 1] + 1);
}
}
return dp[m][n];
}
public static void main(String[] args) {
String s1 = "kitten", s2 = "sitting";
System.out.println("Edit Distance: " + editDistance(s1, s2));
}
}
4. Python
def edit_distance(s1, s2):
m, n = len(s1), len(s2)
dp = [[0] * (n + 1) for _ in range(m + 1)]
for i in range(m + 1):
for j in range(n + 1):
if i == 0:
dp[i][j] = j
elif j == 0:
dp[i][j] = i
elif s1[i - 1] == s2[j - 1]:
dp[i][j] = dp[i - 1][j - 1]
else:
dp[i][j] = min(dp[i - 1][j] + 1, dp[i][j - 1] + 1, dp[i - 1][j - 1] + 1)
return dp[m][n]
# Example usage
s1 = "kitten"
s2 = "sitting"
print("Edit Distance:", edit_distance(s1, s2))
5. C#
using System;
public class EditDistance
{
public static int Min(int a, int b, int c)
{
return Math.Min(Math.Min(a, b), c);
}
public static int EditDistanceAlgorithm(string s1, string s2)
{
int m = s1.Length, n = s2.Length;
int[,] dp = new int[m + 1, n + 1];
for (int i = 0; i <= m; i++)
{
for (int j = 0; j <= n; j++)
{
if (i == 0) dp[i, j] = j;
else if (j == 0) dp[i, j] = i;
else if (s1[i - 1] == s2[j - 1]) dp[i, j] = dp[i - 1, j - 1];
else dp[i, j] = Min(dp[i - 1, j] + 1, dp[i, j - 1] + 1, dp[i - 1, j - 1] + 1);
}
}
return dp[m, n];
}
public static void Main()
{
string s1 = "kitten", s2 = "sitting";
Console.WriteLine("Edit Distance: " + EditDistanceAlgorithm(s1, s2));
}
}
6. JavaScript
function editDistance(s1, s2) {
const m = s1.length, n = s2.length;
const dp = Array.from({ length: m + 1 }, () => Array(n + 1).fill(0));
for (let i = 0; i <= m; i++) {
for (let j = 0; j <= n; j++) {
if (i === 0) dp[i][j] = j;
else if (j === 0) dp[i][j] = i;
else if (s1[i - 1] === s2[j - 1]) dp[i][j] = dp[i - 1][j - 1];
else dp[i][j] = Math.min(dp[i - 1][j] + 1, dp[i][j - 1] + 1, dp[i - 1][j - 1] + 1);
}
}
return dp[m][n];
}
// Example usage
const s1 = "kitten", s2 = "sitting";
console.log("Edit Distance:", editDistance(s1, s2));
Conclusion
The Edit Distance problem is a classical example of dynamic programming. The time and space complexities are quadratic in terms of the lengths of the two strings. By using a 2D DP table, we can solve this problem efficiently for reasonably sized strings.