added readme
This commit is contained in:
parent
b42dab3c80
commit
742800e5cb
2 changed files with 20 additions and 0 deletions
13
README.md
Normal file
13
README.md
Normal file
|
|
@ -0,0 +1,13 @@
|
||||||
|
__RLTK__
|
||||||
|
|
||||||
|
An attempt to manually port some of nltk to rust.
|
||||||
|
|
||||||
|
Currently in it's infancy:
|
||||||
|
* rltk::lm::preprocessing::pad_both_ends(\["a","b","c"], 2) -> "\<s>", "a", "b", "c", "\</s>"]
|
||||||
|
* rltk::util::pad_sequence == same as above with customisation
|
||||||
|
* rltk::util::pad_sequence_left == same
|
||||||
|
* rltk::util::pad_sequence_right == same
|
||||||
|
* rltk::util::ngrams(\["a","b","c"],2) -> \[\["a"], \["b"], \["b"], \["c"]]
|
||||||
|
* rltk::util::bigrams(\["a","b","c"]) == ngrams(..., 2)
|
||||||
|
* rltk::util::trigrams(\["a","b","c"]) == ngrams(..., 3)
|
||||||
|
|
||||||
|
|
@ -56,6 +56,13 @@ pub fn ngrams<'a>(sequence: impl Iterator<Item=&'a &'a str> + 'a, n: usize) -> i
|
||||||
ngrams::NGramSequenceIter::new(sequence, n)
|
ngrams::NGramSequenceIter::new(sequence, n)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn bigrams<'a>(sequence: impl Iterator<Item=&'a &'a str> + 'a) -> impl Iterator<Item=impl Iterator<Item=&'a &'a str> + 'a> + 'a {
|
||||||
|
ngrams::NGramSequenceIter::new(sequence, 2)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn trigrams<'a>(sequence: impl Iterator<Item=&'a &'a str> + 'a) -> impl Iterator<Item=impl Iterator<Item=&'a &'a str> + 'a> + 'a {
|
||||||
|
ngrams::NGramSequenceIter::new(sequence, 3)
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue