From 742800e5cb3756dbca90686b26378162e1fe4e7e Mon Sep 17 00:00:00 2001 From: Sander Hautvast Date: Fri, 29 Apr 2022 16:13:28 +0200 Subject: [PATCH] added readme --- README.md | 13 +++++++++++++ src/util/mod.rs | 7 +++++++ 2 files changed, 20 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..dafc4ce --- /dev/null +++ b/README.md @@ -0,0 +1,13 @@ +__RLTK__ + +An attempt to manually port some of nltk to rust. + +Currently in it's infancy: +* rltk::lm::preprocessing::pad_both_ends(\["a","b","c"], 2) -> "\", "a", "b", "c", "\"] +* rltk::util::pad_sequence == same as above with customisation +* rltk::util::pad_sequence_left == same +* rltk::util::pad_sequence_right == same +* rltk::util::ngrams(\["a","b","c"],2) -> \[\["a"], \["b"], \["b"], \["c"]] +* rltk::util::bigrams(\["a","b","c"]) == ngrams(..., 2) +* rltk::util::trigrams(\["a","b","c"]) == ngrams(..., 3) + diff --git a/src/util/mod.rs b/src/util/mod.rs index db66cfe..fa59681 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -56,6 +56,13 @@ pub fn ngrams<'a>(sequence: impl Iterator + 'a, n: usize) -> i ngrams::NGramSequenceIter::new(sequence, n) } +pub fn bigrams<'a>(sequence: impl Iterator + 'a) -> impl Iterator + 'a> + 'a { + ngrams::NGramSequenceIter::new(sequence, 2) +} + +pub fn trigrams<'a>(sequence: impl Iterator + 'a) -> impl Iterator + 'a> + 'a { + ngrams::NGramSequenceIter::new(sequence, 3) +} #[cfg(test)] mod tests {