added readme
This commit is contained in:
parent
b42dab3c80
commit
742800e5cb
2 changed files with 20 additions and 0 deletions
13
README.md
Normal file
13
README.md
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
__RLTK__
|
||||
|
||||
An attempt to manually port some of nltk to rust.
|
||||
|
||||
Currently in it's infancy:
|
||||
* rltk::lm::preprocessing::pad_both_ends(\["a","b","c"], 2) -> "\<s>", "a", "b", "c", "\</s>"]
|
||||
* rltk::util::pad_sequence == same as above with customisation
|
||||
* rltk::util::pad_sequence_left == same
|
||||
* rltk::util::pad_sequence_right == same
|
||||
* rltk::util::ngrams(\["a","b","c"],2) -> \[\["a"], \["b"], \["b"], \["c"]]
|
||||
* rltk::util::bigrams(\["a","b","c"]) == ngrams(..., 2)
|
||||
* rltk::util::trigrams(\["a","b","c"]) == ngrams(..., 3)
|
||||
|
||||
|
|
@ -56,6 +56,13 @@ pub fn ngrams<'a>(sequence: impl Iterator<Item=&'a &'a str> + 'a, n: usize) -> i
|
|||
ngrams::NGramSequenceIter::new(sequence, n)
|
||||
}
|
||||
|
||||
pub fn bigrams<'a>(sequence: impl Iterator<Item=&'a &'a str> + 'a) -> impl Iterator<Item=impl Iterator<Item=&'a &'a str> + 'a> + 'a {
|
||||
ngrams::NGramSequenceIter::new(sequence, 2)
|
||||
}
|
||||
|
||||
pub fn trigrams<'a>(sequence: impl Iterator<Item=&'a &'a str> + 'a) -> impl Iterator<Item=impl Iterator<Item=&'a &'a str> + 'a> + 'a {
|
||||
ngrams::NGramSequenceIter::new(sequence, 3)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue