diff --git a/README.md b/README.md index 599d85c..6aae3c1 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,12 @@ __RLTK__ An attempt to manually port some of nltk to rust. +from https://www.nltk.org/api/nltk.lm.html: + +_So as to avoid re-creating the text in memory, both train and vocab are lazy iterators. They are evaluated on demand at training time._ + +rltk has the same philosophy: everything is done using iterators (on iterators) on string slices. + Currently in it's infancy (but growing): * rltk::lm::preprocessing::pad_both_ends(\["a","b","c"], 2) -> "\", "a", "b", "c", "\"] * rltk::util::pad_sequence == same as above with customisation diff --git a/src/metrics/mod.rs b/src/metrics/mod.rs index fabd061..39954c1 100644 --- a/src/metrics/mod.rs +++ b/src/metrics/mod.rs @@ -5,6 +5,8 @@ pub mod distance; /// For example, transforming “rain” to “shine” requires three steps, consisting of two substitutions and one insertion: /// “rain” -> “sain” -> “shin” -> “shine”. /// These operations could have been done in other orders, but at least three steps are needed. +/// +/// substitution cost is (for now at least) hardcoded as 2 pub fn edit_distance(s1: &str, s2: &str) -> usize { distance::get_edit_distance_table(s1, s2)[s1.len()][s2.len()].value }