From d187c66dde22d3f2f2e972cee7a35da67fc428a2 Mon Sep 17 00:00:00 2001 From: Sambo Chea Date: Tue, 2 Feb 2021 12:21:10 +0700 Subject: [PATCH] Add splitter text docs --- .gitignore | 2 +- .idea/.gitignore | 8 ++++++++ .idea/misc.xml | 6 ++++++ .idea/ml.iml | 17 +++++++++++++++++ .idea/modules.xml | 8 ++++++++ .idea/vcs.xml | 6 ++++++ research/text/splitter/splitter.md | 19 +++++++++++++++++++ research/text/splitter/splitter.py | 7 +++++++ 8 files changed, 72 insertions(+), 1 deletion(-) create mode 100644 .idea/.gitignore create mode 100644 .idea/misc.xml create mode 100644 .idea/ml.iml create mode 100644 .idea/modules.xml create mode 100644 .idea/vcs.xml create mode 100644 research/text/splitter/splitter.md create mode 100644 research/text/splitter/splitter.py diff --git a/.gitignore b/.gitignore index b6e4761..4bc3716 100644 --- a/.gitignore +++ b/.gitignore @@ -69,7 +69,7 @@ instance/ .scrapy # Sphinx documentation -docs/_build/ +research/_build/ # PyBuilder target/ diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..73f69e0 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml +# Editor-based HTTP Client requests +/httpRequests/ diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..2c99a69 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/.idea/ml.iml b/.idea/ml.iml new file mode 100644 index 0000000..9b6e332 --- /dev/null +++ b/.idea/ml.iml @@ -0,0 +1,17 @@ + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..e9898da --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..35eb1dd --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/research/text/splitter/splitter.md b/research/text/splitter/splitter.md new file mode 100644 index 0000000..2127323 --- /dev/null +++ b/research/text/splitter/splitter.md @@ -0,0 +1,19 @@ +# Text Splitter + +***Objectives*** +- [ ] Split the statement into array of words +- [ ] Split the concat words into other of array + +***Declaratives*** +- Statement I (split the concat words) +```text +myword and youknowaboutitaswell +``` +- Result as output +```text +my word and you know about it as well +``` +- Result as output in code +```text +[my, word, and, you, know, about, it, as, well] +``` \ No newline at end of file diff --git a/research/text/splitter/splitter.py b/research/text/splitter/splitter.py new file mode 100644 index 0000000..3799e8d --- /dev/null +++ b/research/text/splitter/splitter.py @@ -0,0 +1,7 @@ +import enchant + +locale = 'en_US' + +dict = enchant.Dict(locale) + +print(dict.check("Hello")) \ No newline at end of file