Skip to main content

Overview

Pattern matching algorithms efficiently find occurrences of a pattern string within a text string. Each algorithm has specific strengths for different use cases.

KMP (Knuth-Morris-Pratt)

KMP algorithm uses a prefix function to avoid redundant comparisons, achieving O(n + m) time complexity.
Time Complexity
  • Preprocessing: O(m)
  • Search: O(n)
  • Total: O(n + m)
Space Complexity: O(m)Where n = text length, m = pattern length

Prefix Function

string_prefix_function.cpp
template <typename T> vector<int> prefix_function(const T &s) {
    int n = (int)s.size();
    vector<int> lps(n, 0);
    lps[0] = 0;
    int matched = 0;
    for (int pos = 1; pos < n; ++pos) {
        while (matched > 0 && s[pos] != s[matched])
            matched = lps[matched - 1];
        if (s[pos] == s[matched])
            matched++;
        lps[pos] = matched;
    }
    return lps;
}
// Longest prefix which is also suffix
// Time Complexity: O(N), Space Complexity: O(N)

KMP Implementation

string_kmp_std.cpp
template <typename T> vector<int> kmp(const T &text, const T &pattern) {
    int n = (int)text.size(), m = (int)pattern.size();
    vector<int> lcp = prefix_function(pattern), occurrences;
    int matched = 0;
    for (int idx = 0; idx < n; ++idx) {
        while (matched > 0 && text[idx] != pattern[matched])
            matched = lcp[matched - 1];
        if (text[idx] == pattern[matched])
            matched++;
        if (matched == m) {
            occurrences.push_back(idx - matched + 1);
            matched = lcp[matched - 1];
        }
    }
    return occurrences;
}

template <typename T>
vector<int> search_pattern(const T &text, const T &pattern) {
    return kmp(text, pattern);
}

KMP Usage Examples

Rabin-Karp

Rabin-Karp uses rolling hash to efficiently search for patterns. It’s particularly useful for multiple pattern matching.
Time Complexity
  • Preprocessing: O(m)
  • Search: O(n + m) average case, O(nm) worst case
Space Complexity: O(1)

Implementation

string_rabin_karp_std.cpp
template <typename T> vector<int> rabin_karp(const T &text, const T &pattern) {
    int n = (int)text.size(), m = (int)pattern.size();
    hashing<string> txt(text), pat(pattern);
    vector<int> occurrences;
    auto hash_pat = pat.query(0, m - 1);
    for (int i = 0; i < n - m + 1; ++i) {
        auto hash_txt = txt.query(i, i + m - 1);
        if (hash_pat == hash_txt)
            occurrences.push_back(i);
    }
    return occurrences;
}

template <typename T>
vector<int> search_pattern(const T &text, const T &pattern) {
    return rabin_karp(text, pattern);
}

Rabin-Karp Usage

string text = "ABABABAB";
string pattern = "ABA";

vector<int> matches = rabin_karp(text, pattern);
// Returns: {0, 2, 4}

for (int pos : matches) {
    cout << "Match at " << pos << endl;
}

Z-Algorithm

Z-algorithm computes Z-array where Z[i] is the length of the longest substring starting from i that matches the prefix.
Time Complexity: O(n + m)Space Complexity: O(n + m)

Implementation

string_z_algorithm_std.cpp
vector<int> z_algorithm(const string &s) {
    int n = (int)s.size();
    vector<int> z_array(n);
    int left = 0, right = 0;
    z_array[0] = 0;
    for (int idx = 1; idx < n; ++idx) {
        if (idx > right) {
            left = right = idx;
            while (right < n && s[right - left] == s[right])
                right++;
            z_array[idx] = right - left;
            right--;
        } else {
            int k = idx - left;
            if (z_array[k] < right - idx + 1) {
                z_array[idx] = z_array[k];
            } else {
                left = idx;
                while (right < n && s[right - left] == s[right])
                    right++;
                z_array[idx] = right - left;
                right--;
            }
        }
    }
    return z_array;
}

Z-Algorithm Usage

string text = "ABABCABABA";
string pattern = "ABA";

// Combine pattern and text with separator
string combined = pattern + "#" + text;
vector<int> z = z_algorithm(combined);

int m = pattern.size();
vector<int> matches;
for (int i = m + 1; i < combined.size(); i++) {
    if (z[i] == m) {
        matches.push_back(i - m - 1);
    }
}
// matches = {0, 5, 7}

Aho-Corasick

Aho-Corasick algorithm is used for multiple pattern matching. It builds a trie with failure links for efficient searching.
Time Complexity
  • Build: O(Σm) where Σm is sum of all pattern lengths
  • Search: O(n + k) where k is number of matches
Space Complexity: O(Σm × alphabet_size)

Implementation

string_aho_corasick_occurrences.cpp
const int ALPHA = 26;  // alphabet size
const char L = 'a';    // first letter of the alphabet

struct TrieNode {
    int next[ALPHA];
    int fail, fail_out;
    bool end;
    int cnt;
    vector<int> idxs;

    TrieNode() {
        fill(next, next + ALPHA, 0);
        fail_out = fail = 0;
        end = false;
        cnt = 0;
    }
    int &operator[](int idx) { return next[idx]; }
};

class AhoCorasick {
  public:
    int nodes;
    int n;
    vector<TrieNode> trie;
    vector<string> words;

    AhoCorasick(const vector<string> &words) : nodes(0), words(words) {
        trie.emplace_back();
        n = (int)words.size();
        int idx = 0;
        for (const string &word : words)
            insert(word, idx++);
        build_ac();
    }

    vector<vector<int>> search(const string &text) {
        vector<vector<int>> answer(n);
        int state = 0;
        for (int i = 0; i < (int)text.size(); ++i) {
            int c = text[i] - L;
            while (state > 0 && !trie[state][c])
                state = trie[state].fail;
            state = trie[state][c];

            int aux = state;
            while (aux) {
                if (trie[aux].end) {
                    for (const int &idx : trie[aux].idxs) {
                        answer[idx].push_back(i - (int)words[idx].size() + 1);
                    }
                }
                aux = trie[aux].fail_out;
            }
        }
        return answer;
    }

  private:
    void insert(const string &word, int idx) {
        int state = 0;
        for (const char &ch : word) {
            int c = ch - L;
            if (!trie[state][c]) {
                trie.emplace_back();
                trie[state][c] = ++nodes;
            }
            state = trie[state][c];
        }
        trie[state].end = true;
        trie[state].cnt++;
        trie[state].idxs.push_back(idx);
    }

    void build_ac() {
        queue<int> q;
        for (int ch = 0; ch < ALPHA; ++ch) {
            if (trie[0][ch]) {
                trie[trie[0][ch]].fail = 0;
                trie[trie[0][ch]].fail_out = 0;
                q.push(trie[0][ch]);
            }
        }
        while (!q.empty()) {
            int node = q.front();
            q.pop();
            for (int ch = 0; ch < ALPHA; ++ch) {
                if (trie[node][ch]) {
                    int failure = trie[node].fail;
                    while (failure > 0 && !trie[failure][ch])
                        failure = trie[failure].fail;
                    failure = trie[failure][ch];
                    trie[trie[node][ch]].fail = failure;
                    if (trie[failure].end) {
                        trie[trie[node][ch]].fail_out = failure;
                    } else {
                        trie[trie[node][ch]].fail_out = trie[failure].fail_out;
                    }
                    trie[trie[node][ch]].cnt += trie[failure].cnt;
                    q.push(trie[node][ch]);
                }
            }
        }
    }
};

Aho-Corasick Usage

Algorithm Comparison

AlgorithmBest ForProsCons
KMPSingle pattern, repeating prefixesDeterministic O(n), simpleOnly single pattern
Rabin-KarpMultiple patterns, same lengthEasy to implement, flexibleProbabilistic, collisions
Z-AlgorithmPrefix matching, palindromesSimple, versatileNot for multiple patterns
Aho-CorasickMultiple patterns, dictionaryFinds all matches efficientlyComplex, memory intensive

Practice Problems

  1. Implement strStr() (LeetCode 28) - Use KMP
  2. Repeated String Match (LeetCode 686) - Use Rabin-Karp
  3. Find All Anagrams (LeetCode 438) - Use rolling hash
  4. Word Search II (LeetCode 212) - Use Aho-Corasick
  5. Shortest Palindrome (LeetCode 214) - Use KMP or Z-algorithm

See Also

Build docs developers (and LLMs) love