added some more documentation
This commit is contained in:
parent
ae7931774d
commit
f72da25396
2 changed files with 8 additions and 0 deletions
|
|
@ -2,6 +2,12 @@ __RLTK__
|
|||
|
||||
An attempt to manually port some of nltk to rust.
|
||||
|
||||
from https://www.nltk.org/api/nltk.lm.html:
|
||||
|
||||
_So as to avoid re-creating the text in memory, both train and vocab are lazy iterators. They are evaluated on demand at training time._
|
||||
|
||||
rltk has the same philosophy: everything is done using iterators (on iterators) on string slices.
|
||||
|
||||
Currently in it's infancy (but growing):
|
||||
* rltk::lm::preprocessing::pad_both_ends(\["a","b","c"], 2) -> "\<s>", "a", "b", "c", "\</s>"]
|
||||
* rltk::util::pad_sequence == same as above with customisation
|
||||
|
|
|
|||
|
|
@ -5,6 +5,8 @@ pub mod distance;
|
|||
/// For example, transforming “rain” to “shine” requires three steps, consisting of two substitutions and one insertion:
|
||||
/// “rain” -> “sain” -> “shin” -> “shine”.
|
||||
/// These operations could have been done in other orders, but at least three steps are needed.
|
||||
///
|
||||
/// substitution cost is (for now at least) hardcoded as 2
|
||||
pub fn edit_distance(s1: &str, s2: &str) -> usize {
|
||||
distance::get_edit_distance_table(s1, s2)[s1.len()][s2.len()].value
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue