Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
da61704
Fix: move import to init.
SayaZhang Jan 22, 2024
b5e69b4
Add recursive character splitter
SayaZhang Jan 23, 2024
26255e7
Merge branch 'main' of https://github.com/CambioML/uniflow into main
SayaZhang Jan 24, 2024
f0fb952
Merge branch 'main' of https://github.com/CambioML/uniflow into main
SayaZhang Jan 25, 2024
af81106
Merge branch 'main' of https://github.com/CambioML/uniflow into main
SayaZhang Jan 27, 2024
b9bfad6
Update recursive splitter
SayaZhang Jan 27, 2024
61a4f3e
Merge branch 'main' of https://github.com/CambioML/uniflow into main
SayaZhang Feb 3, 2024
c3e5c15
Update html_op param
SayaZhang Feb 3, 2024
98e32f0
Update extract html example
SayaZhang Feb 3, 2024
54adc75
a function to read files from Amazon S3, URLs, or local paths
CluckRookie Feb 3, 2024
0b59d6f
Add @SayaZhang @CluckRookie @SeisSerenata @jojortz as repo codeowners…
goldmermaid Jan 31, 2024
3b5b17b
Add a new line for codeowners file
goldmermaid Jan 31, 2024
1faa714
Merge pull request #157 from CambioML/dev
Feb 3, 2024
c441d66
Merge branch 'main' into main
SayaZhang Feb 3, 2024
d8d4573
Merge pull request #141 from SayaZhang/main
SayaZhang Feb 3, 2024
c9e4389
a function to read files from Amazon S3, URLs, or local paths
CluckRookie Feb 3, 2024
8e31b55
Merge branch 'main' into main
CluckRookie Feb 3, 2024
7ff69fb
Merge pull request #162 from CluckRookie/main
CluckRookie Feb 3, 2024
3a10959
Merge branch 'main' into CambioML-main
LZDXN Feb 3, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/CODEOWNERS
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
# Default codeowners/reviewers for all code changes
* @CambioML @goldmermaid
* @CambioML @goldmermaid @SayaZhang @CluckRookie @SeisSerenata
70 changes: 37 additions & 33 deletions example/extract/extract_html.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -67,13 +67,13 @@
{
"data": {
"text/plain": [
"{'extract': ['ExtractImageFlow',\n",
"{'extract': ['ExtractHTMLFlow',\n",
" 'ExtractImageFlow',\n",
" 'ExtractIpynbFlow',\n",
" 'ExtractMarkdownFlow',\n",
" 'ExtractPDFFlow',\n",
" 'ExtractTxtFlow',\n",
" 'ExtractS3TxtFlow',\n",
" 'ExtractHTMLFlow'],\n",
" 'ExtractS3TxtFlow'],\n",
" 'transform': ['TransformAzureOpenAIFlow',\n",
" 'TransformCopyFlow',\n",
" 'TransformHuggingFaceFlow',\n",
Expand Down Expand Up @@ -116,7 +116,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -132,7 +132,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -141,21 +141,14 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
" 0%| | 0/1 [00:00<?, ?it/s]"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 1/1 [00:00<00:00, 4.53it/s]\n"
"100%|██████████| 1/1 [00:00<00:00, 10330.80it/s]\n"
]
}
],
Expand All @@ -174,40 +167,51 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['22.11. Information Theory — Dive into Deep Learning 1.0.3 documentation22.',\n",
"['22.11. Information Theory — Dive into Deep Learning 1.0.3 documentation',\n",
" 'Appendix: Mathematics for Deep Learning',\n",
" 'navigate_next',\n",
" 'Information Theory',\n",
" 'Quick search',\n",
" 'Show Source',\n",
" 'Preview Version',\n",
" 'Table Of Contents',\n",
" 'Installation',\n",
" '1. Introduction',\n",
" '2. Preliminaries',\n",
" '2.1. Data Manipulation',\n",
" '2.2. Data Preprocessing',\n",
" '2.3. Linear Algebra',\n",
" '2.4. Calculus',\n",
" '2.5. Automatic Differentiation',\n",
" '2.6. Probability and Statistics',\n",
" '3. Linear Neural Networks for Regression',\n",
" '3.1. Linear Regression',\n",
" '3.2. Object-Oriented Design for Implementation',\n",
" '3.3. Synthetic Regression Data',\n",
" '3.4. Linear Regression Implementation from Scratch',\n",
" '3.5. Concise Implementation of Linear Regression',\n",
" '4. Linear Neural Networks for Classification',\n",
" '4.1. Softmax Regression',\n",
" '4.2. The Image Classification Dataset',\n",
" '4.3. The Base Classification Model',\n",
" '4.4. Softmax Regression Implementation from Scratch',\n",
" '4.5. Concise Implementation of Softmax Regression',\n",
" '4.6. Generalization in Classification',\n",
" '4.7. Environment and Distribution Shift']\n"
" '2.7. Documentation']\n"
]
}
],
"source": [
"text = output[0]['output'][0]['text'][0]\n",
"text = [p for p in text.split(\"\\n\") if len(p) > 20]\n",
"pprint.pprint(text[:20])"
"text = output[0]['output'][0]['text'][0:30]\n",
"text = [p for p in text if len(p) > 10]\n",
"pprint.pprint(text)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## End of the notebook\n",
"\n",
"Check more Uniflow use cases in the [example folder](https://github.com/CambioML/uniflow/tree/main/example/model#examples)!\n",
"\n",
"<a href=\"https://www.cambioml.com/\" title=\"Title\">\n",
" <img src=\"../image/cambioml_logo_large.png\" style=\"height: 100px; display: block; margin-left: auto; margin-right: auto;\"/>\n",
"</a>"
]
}
],
Expand Down
Loading