RETURNS tsvector
BEGIN ATOMIC
SELECT to_tsvector('simple',
string_agg(array_to_string(daitch_mokotoff(n), ' '), ' '))
FROM regexp_split_to_table(v_name, '\s+') AS n;
END;
CREATE FUNCTION soundex_tsquery(v_name text) RETURNS tsquery
BEGIN ATOMIC
SELECT string_agg('(' || array_to_string(daitch_mokotoff(n), '|') || ')', '&')::tsquery
FROM regexp_split_to_table(v_name, '\s+') AS n;
END;
CREATE TABLE s (nm text);
CREATE INDEX ix_s_txt ON s USING gin (soundex_tsvector(nm)) WITH (fastupdate = off);
INSERT INTO s (nm) VALUES
('John Doe'),
('Jane Roe'),
('Public John Q.'),
('George Best'),
('John Yamson');
SELECT * FROM s WHERE soundex_tsvector(nm) @@ soundex_tsquery('john');
SELECT * FROM s WHERE soundex_tsvector(nm) @@ soundex_tsquery('jane doe');
SELECT * FROM s WHERE soundex_tsvector(nm) @@ soundex_tsquery('john public');
SELECT * FROM s WHERE soundex_tsvector(nm) @@ soundex_tsquery('besst, giorgio');
SELECT * FROM s WHERE soundex_tsvector(nm) @@ soundex_tsquery('Jameson John');
</programlisting>
<para>
If it is desired to avoid recalculation of soundex codes during index
rechecks, an index on a separate column can be used instead of an index on
an expression. A stored generated column can be used for this; see
<xref linkend="ddl-generated-columns"/>.
</para>
</sect2>
<sect2 id="fuzzystrmatch-levenshtein">
<title>Levenshtein</title>
<para>
This function calculates the Levenshtein distance between two strings:
</para>
<indexterm>
<primary>levenshtein</primary>
</indexterm>
<indexterm>
<primary>levenshtein_less_equal</primary>
</indexterm>
<synopsis>
levenshtein(source text, target text, ins_cost int, del_cost int, sub_cost int) returns int
levenshtein(source text, target text) returns int
levenshtein_less_equal(source text, target text, ins_cost int, del_cost int, sub_cost int, max_d int) returns int
levenshtein_less_equal(source text, target text, max_d int) returns int
</synopsis>
<para>
Both <literal>source</literal> and <literal>target</literal> can be any
non-null string, with a maximum of 255 characters. The cost parameters
specify how much to charge for a character insertion, deletion, or
substitution, respectively. You can omit the cost parameters, as in
the second version of the function; in that case they all default to 1.
</para>
<para>
<function>levenshtein_less_equal</function> is an accelerated version of the
Levenshtein function for use when only small distances are of interest.
If the actual distance is less than or equal to <literal>max_d</literal>,
then