Skip to content

Commit 2ede6c7

Browse files
committed
Breaking: add options, bump version to v0.2.0
1 parent 2bd187c commit 2ede6c7

File tree

8 files changed

+510
-89
lines changed

8 files changed

+510
-89
lines changed

.github/workflows/release.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ jobs:
2323
- { target: x86_64-apple-darwin , os: macos-13 }
2424
- { target: x86_64-unknown-linux-gnu , os: ubuntu-22.04 }
2525
- { target: x86_64-unknown-linux-musl , os: ubuntu-22.04 , use-cross: true }
26-
- { target: x86_64-pc-windows-gnu , os: windows-2019 }
27-
- { target: x86_64-pc-windows-msvc , os: windows-2019 }
26+
- { target: x86_64-pc-windows-gnu , os: windows-2025 }
27+
- { target: x86_64-pc-windows-msvc , os: windows-2025 }
2828

2929
steps:
3030
- name: Checkout source code

README.md

Lines changed: 6 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ Add `html2text` to your list of dependencies in `mix.exs`:
1414
```elixir
1515
def deps do
1616
[
17-
{:html2text, "~> 0.1"}
17+
{:html2text, "~> 0.2"}
1818
]
1919
end
2020
```
@@ -27,28 +27,18 @@ mix deps.get
2727

2828
## Usage
2929

30-
The library provides a single main function `HTML2Text.convert/2` that takes HTML content and a width parameter.
31-
3230
```elixir
3331
# Convert with specific line width
3432
html = "<h1>Welcome</h1><p>This is a sample paragraph with some content.</p>"
35-
text = HTML2Text.convert(html, 30)
33+
text = HTML2Text.convert!(html, width: 30)
3634
IO.puts(text)
35+
3736
# Output:
3837
# # Welcome
3938
#
4039
# This is a sample paragraph
4140
# with some content.
4241

43-
44-
# Convert with unlimited width
45-
text = HTML2Text.convert(html, :infinity)
46-
IO.puts(text)
47-
# Output:
48-
# # Welcome
49-
#
50-
# This is a sample paragraph with some content.
51-
5242
html = """
5343
<article>
5444
<h1>Article Title</h1>
@@ -92,7 +82,7 @@ html = """
9282
</article>
9383
"""
9484

95-
text = HTML2Text.convert(html, 70)
85+
text = HTML2Text.convert!(html)
9686
IO.puts(text)
9787

9888
# Output:
@@ -120,8 +110,8 @@ IO.puts(text)
120110
#
121111
# ## Conclusion
122112
#
123-
# This article provided an overview of important web technologies and
124-
# some key statistics.
113+
# This article provided an overview of important web technologies and some key
114+
# statistics.
125115
#
126116
# [1]: http://example.com
127117
```

lib/html2_text.ex

Lines changed: 49 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -39,61 +39,64 @@ defmodule HTML2Text do
3939
version: version,
4040
force_build: System.get_env("HTML2TEXT_BUILD") in ["1", "true"]
4141

42-
@doc """
43-
Converts HTML content to plain text with configurable line width.
44-
45-
This function converts HTML content to plain text and optionally wraps lines at the
46-
specified width. The width can be either a positive integer representing the maximum
47-
number of characters per line, or `:infinity` for unlimited line width.
42+
@type opts :: [
43+
width: pos_integer() | :infinity,
44+
decorate: boolean(),
45+
link_footnotes: boolean(),
46+
table_borders: boolean(),
47+
pad_block_width: boolean(),
48+
allow_width_overflow: boolean(),
49+
min_wrap_width: pos_integer(),
50+
raw: boolean(),
51+
wrap_links: boolean(),
52+
unicode_strikeout: boolean()
53+
]
4854

49-
## Parameters
55+
@doc """
56+
Converts HTML content to plain text.
57+
58+
## Options
59+
- `:width` — Maximum line width (positive integer or `:infinity`). Defaults to `80`. Setting to `:infinity` disables line wrapping and outputs the entire text on a single line.
60+
- `:decorate` — Enables text decorations like bold or italic. Boolean, defaults to `true`. When `false`, output is plain text without styling.
61+
- `:link_footnotes` — Adds numbered link footnotes at the end of the text. Boolean, defaults to `true`. When `false`, links are omitted.
62+
- `:table_borders` — Shows ASCII borders around table cells. Boolean, defaults to `true`. When `false`, tables render without borders.
63+
- `:pad_block_width` — Pads blocks with spaces to align text to full width. Boolean, defaults to `false`. Useful for fixed-width layouts.
64+
- `:allow_width_overflow` — Allows lines to exceed the specified width if wrapping is impossible. Boolean, defaults to `false`. Prevents errors when content can't fit.
65+
- `:min_wrap_width` — Minimum length of text chunks when wrapping lines. Integer ≥ 1, defaults to `3`. Helps avoid awkwardly narrow wraps.
66+
- `:raw` — Enables raw mode with minimal processing and formatting. Boolean, defaults to `false`. Produces plain, raw text output.
67+
- `:wrap_links` — Wraps long URLs or links onto multiple lines. Boolean, defaults to `true`. When `false`, links stay on a single line and may overflow.
68+
- `:unicode_strikeout` — Uses Unicode characters for strikeout text. Boolean, defaults to `true`. When `false`, strikeout renders in simpler styles.
5069
51-
- `html` - A binary containing the HTML content to convert
52-
- `width` - Either a positive integer for line width or `:infinity` for unlimited width
70+
## Examples
5371
54-
## Return Value
72+
iex> html = "<h1>Title</h1><p>Some paragraph text.</p>"
73+
...> HTML2Text.convert(html, width: 15)
74+
{:ok, "# Title\\n\\nSome paragraph\\ntext.\\n"}
5575
56-
Returns a string containing the plain text representation of the HTML content.
76+
iex> HTML2Text.convert("<b>Important</b>", decorate: false)
77+
{:ok, "Important\\n"}
5778
58-
## Examples
59-
60-
# Converting with specific width
61-
iex> html = "<h1>Welcome to Our Amazing Website</h1><p>This is a comprehensive guide that covers everything you need to know about our services and products.</p>"
62-
iex> HTML2Text.convert(html, 30)
63-
"# Welcome to Our Amazing\\n# Website\\n\\nThis is a comprehensive guide\\nthat covers everything you\\nneed to know about our\\nservices and products.\\n"
64-
65-
# Converting with unlimited width
66-
iex> html = "<div><strong>Important:</strong> Please read all the terms and conditions carefully before proceeding with your purchase.</div>"
67-
iex> HTML2Text.convert(html, :infinity)
68-
"**Important:** Please read all the terms and conditions carefully before proceeding with your purchase.\\n"
69-
70-
# Converting lists and complex HTML
71-
iex> html = "<ul><li>First item with some detailed description</li><li>Second item that also has quite a bit of text</li><li>Third item</li></ul>"
72-
iex> HTML2Text.convert(html, 25)
73-
"* First item with some\\n detailed description\\n* Second item that also\\n has quite a bit of text\\n* Third item\\n"
74-
75-
# Converting tables and structured content
76-
iex> html = "<table><tr><td>Product Name</td><td>Description</td><td>Price</td></tr><tr><td>Widget</td><td>A useful widget for everyday tasks</td><td>$19.99</td></tr></table>"
77-
iex> HTML2Text.convert(html, 50)
78-
\"""
79-
───────────┬────────────────────────────────┬─────
80-
Product │Description │Price
81-
Name │ │
82-
───────────┼────────────────────────────────┼─────
83-
Widget │A useful widget for everyday │$19.9
84-
│tasks │9
85-
───────────┴────────────────────────────────┴─────
86-
\"""
79+
iex> HTML2Text.convert("<table><tr><td>A</td><td>B</td></tr></table>", [])
80+
{:ok, "─┬─\\nA│B\\n─┴─\\n"}
8781
8882
"""
89-
@spec convert(String.t(), pos_integer() | :infinity) :: String.t()
90-
def convert(html, :infinity) when is_binary(html) do
91-
do_convert(html, :infinity)
83+
@spec convert(html :: String.t(), opts()) :: {:ok, text :: String.t()} | {:error, reason :: String.t()}
84+
def convert(html, opts \\ []) do
85+
do_convert(html, opts)
9286
end
9387

94-
def convert(html, width) when is_binary(html) and is_integer(width) and width > 0 do
95-
do_convert(html, width)
88+
@doc """
89+
Converts HTML content to plain text, raising on failure.
90+
91+
This function behaves like `convert/2`, but raises an error if conversion fails.
92+
"""
93+
@spec convert!(html :: String.t(), opts :: opts()) :: String.t()
94+
def convert!(html, opts \\ []) do
95+
case do_convert(html, opts) do
96+
{:ok, text} -> text
97+
{:error, reason} -> raise "HTML to text conversion failed: #{reason}"
98+
end
9699
end
97100

98-
defp do_convert(_html, _width), do: :erlang.nif_error(:nif_not_loaded)
101+
defp do_convert(_html, _opts), do: :erlang.nif_error(:nif_not_loaded)
99102
end

mix.exs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
defmodule HTML2Text.MixProject do
22
use Mix.Project
3-
@version "0.1.1"
3+
@version "0.2.0"
44
@source_url "https://github.com/fuelen/html2text"
55

66
def project do

native/html2text_nif/Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

native/html2text_nif/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "html2text_nif"
3-
version = "0.1.1"
3+
version = "0.2.0"
44
authors = []
55
edition = "2021"
66

native/html2text_nif/src/lib.rs

Lines changed: 86 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,92 @@
1-
use rustler::{NifResult, Env, Atom, Term};
2-
use std::io::Cursor;
3-
use html2text::from_read;
1+
use rustler::{Env, NifResult, Term, Atom};
42

53
#[rustler::nif(schedule = "DirtyCpu")]
6-
fn do_convert(env: Env, html: String, width_term: Term) -> NifResult<String> {
7-
let cursor = Cursor::new(html.as_bytes());
8-
9-
// Determine width - parameters are already validated by Elixir guard
10-
let actual_width = if let Ok(atom) = width_term.decode::<Atom>() {
11-
let infinity_atom = Atom::from_str(env, "infinity")?;
12-
if atom == infinity_atom {
13-
usize::MAX
14-
} else {
15-
unreachable!() // guard should filter this out
4+
fn do_convert(env: Env, html: String, config_term: Term) -> NifResult<(Atom, String)> {
5+
let kv_vec: Vec<(Atom, Term)> = config_term.decode()?;
6+
7+
let mut width = 80usize;
8+
let mut decorate = true;
9+
let mut link_footnotes = true;
10+
let mut table_borders = true;
11+
let mut pad_block_width = false;
12+
let mut allow_width_overflow = false;
13+
let mut min_wrap_width = 3usize;
14+
let mut raw = false;
15+
let mut wrap_links = true;
16+
let mut unicode_strikeout = true;
17+
18+
let key_width = Atom::from_str(env, "width")?;
19+
let key_decorate = Atom::from_str(env, "decorate")?;
20+
let key_link_footnotes = Atom::from_str(env, "link_footnotes")?;
21+
let key_table_borders = Atom::from_str(env, "table_borders")?;
22+
let key_pad_block_width = Atom::from_str(env, "pad_block_width")?;
23+
let key_allow_width_overflow = Atom::from_str(env, "allow_width_overflow")?;
24+
let key_min_wrap_width = Atom::from_str(env, "min_wrap_width")?;
25+
let key_raw = Atom::from_str(env, "raw")?;
26+
let key_wrap_links = Atom::from_str(env, "wrap_links")?;
27+
let key_unicode_strikeout = Atom::from_str(env, "unicode_strikeout")?;
28+
29+
let infinity_atom = Atom::from_str(env, "infinity")?;
30+
31+
for (key, val) in kv_vec {
32+
if key == key_width {
33+
if let Ok(atom) = val.decode::<Atom>() {
34+
if atom == infinity_atom {
35+
width = usize::MAX;
36+
}
37+
} else if let Ok(w) = val.decode::<usize>() {
38+
width = w;
39+
}
40+
} else if key == key_decorate {
41+
decorate = val.decode::<bool>().unwrap_or(false);
42+
} else if key == key_link_footnotes {
43+
link_footnotes = val.decode::<bool>().unwrap_or(false);
44+
} else if key == key_table_borders {
45+
table_borders = val.decode::<bool>().unwrap_or(true);
46+
} else if key == key_pad_block_width {
47+
pad_block_width = val.decode::<bool>().unwrap_or(false);
48+
} else if key == key_allow_width_overflow {
49+
allow_width_overflow = val.decode::<bool>().unwrap_or(false);
50+
} else if key == key_min_wrap_width {
51+
min_wrap_width = val.decode::<usize>().unwrap_or(3);
52+
} else if key == key_raw {
53+
raw = val.decode::<bool>().unwrap_or(false);
54+
} else if key == key_wrap_links {
55+
wrap_links = val.decode::<bool>().unwrap_or(true);
56+
} else if key == key_unicode_strikeout {
57+
unicode_strikeout = val.decode::<bool>().unwrap_or(true);
1658
}
17-
} else if let Ok(width) = width_term.decode::<usize>() {
18-
width
19-
} else {
20-
unreachable!() // guard should filter this out
21-
};
22-
23-
match from_read(cursor, actual_width) {
24-
Ok(text) => Ok(text),
25-
Err(e) => Err(rustler::Error::Term(Box::new(e.to_string()))),
59+
}
60+
61+
let mut config = html2text::config::plain_no_decorate().max_wrap_width(width);
62+
63+
if decorate {
64+
config = config.do_decorate();
65+
}
66+
config = config.link_footnotes(link_footnotes);
67+
68+
if !table_borders {
69+
config = config.no_table_borders();
70+
}
71+
if pad_block_width {
72+
config = config.pad_block_width();
73+
}
74+
if allow_width_overflow {
75+
config = config.allow_width_overflow();
76+
}
77+
config = config.min_wrap_width(min_wrap_width);
78+
79+
if raw {
80+
config = config.raw_mode(true);
81+
}
82+
if !wrap_links {
83+
config = config.no_link_wrapping();
84+
}
85+
config = config.unicode_strikeout(unicode_strikeout);
86+
87+
match config.string_from_read(html.as_bytes(), width) {
88+
Ok(text) => Ok((Atom::from_str(env, "ok")?, text)),
89+
Err(e) => Ok((Atom::from_str(env, "error")?, e.to_string())),
2690
}
2791
}
2892

0 commit comments

Comments
 (0)