added readme

This commit is contained in:
Sander Hautvast 2022-04-29 16:13:28 +02:00
parent b42dab3c80
commit 742800e5cb
2 changed files with 20 additions and 0 deletions

13
README.md Normal file
View file

@ -0,0 +1,13 @@
__RLTK__
An attempt to manually port some of nltk to rust.
Currently in it's infancy:
* rltk::lm::preprocessing::pad_both_ends(\["a","b","c"], 2) -> "\<s>", "a", "b", "c", "\</s>"]
* rltk::util::pad_sequence == same as above with customisation
* rltk::util::pad_sequence_left == same
* rltk::util::pad_sequence_right == same
* rltk::util::ngrams(\["a","b","c"],2) -> \[\["a"], \["b"], \["b"], \["c"]]
* rltk::util::bigrams(\["a","b","c"]) == ngrams(..., 2)
* rltk::util::trigrams(\["a","b","c"]) == ngrams(..., 3)

View file

@ -56,6 +56,13 @@ pub fn ngrams<'a>(sequence: impl Iterator<Item=&'a &'a str> + 'a, n: usize) -> i
ngrams::NGramSequenceIter::new(sequence, n)
}
pub fn bigrams<'a>(sequence: impl Iterator<Item=&'a &'a str> + 'a) -> impl Iterator<Item=impl Iterator<Item=&'a &'a str> + 'a> + 'a {
ngrams::NGramSequenceIter::new(sequence, 2)
}
pub fn trigrams<'a>(sequence: impl Iterator<Item=&'a &'a str> + 'a) -> impl Iterator<Item=impl Iterator<Item=&'a &'a str> + 'a> + 'a {
ngrams::NGramSequenceIter::new(sequence, 3)
}
#[cfg(test)]
mod tests {