fix(web-fetch): wire html2md feature dependency

This commit is contained in:
xj 2026-03-01 15:27:08 -08:00
parent 61ee2a4664
commit 93010bf75b
2 changed files with 184 additions and 20 deletions

199
Cargo.lock generated
View File

@ -427,6 +427,19 @@ version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
[[package]]
name = "auto_encoder"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f6364e11e0270035ec392151a54f1476e6b3612ef9f4fe09d35e72a8cebcb65"
dependencies = [
"chardetng",
"encoding_rs",
"percent-encoding",
"phf 0.11.3",
"phf_codegen 0.11.3",
]
[[package]]
name = "autocfg"
version = "1.5.0"
@ -768,7 +781,7 @@ dependencies = [
"cap-primitives",
"cap-std",
"io-lifetimes",
"windows-sys 0.52.0",
"windows-sys 0.59.0",
]
[[package]]
@ -797,7 +810,7 @@ dependencies = [
"maybe-owned",
"rustix 1.1.4",
"rustix-linux-procfs",
"windows-sys 0.52.0",
"windows-sys 0.59.0",
"winx",
]
@ -917,6 +930,17 @@ dependencies = [
"zeroize",
]
[[package]]
name = "chardetng"
version = "0.1.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14b8f0b65b7b08ae3c8187e8d77174de20cb6777864c6b832d8ad365999cf1ea"
dependencies = [
"cfg-if",
"encoding_rs",
"memchr",
]
[[package]]
name = "chrono"
version = "0.4.44"
@ -1468,6 +1492,29 @@ dependencies = [
"typenum",
]
[[package]]
name = "cssparser"
version = "0.36.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dae61cf9c0abb83bd659dab65b7e4e38d8236824c85f0f804f173567bda257d2"
dependencies = [
"cssparser-macros",
"dtoa-short",
"itoa",
"phf 0.13.1",
"smallvec",
]
[[package]]
name = "cssparser-macros"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331"
dependencies = [
"quote",
"syn 2.0.117",
]
[[package]]
name = "csv"
version = "1.4.0"
@ -1845,6 +1892,21 @@ dependencies = [
"litrs",
]
[[package]]
name = "dtoa"
version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c3cf4824e2d5f025c7b531afcb2325364084a16806f6d47fbc1f5fbd9960590"
[[package]]
name = "dtoa-short"
version = "0.3.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd1511a7b6a56299bd043a9c167a6d2bfb37bf84a6dfceaba651168adfb43c87"
dependencies = [
"dtoa",
]
[[package]]
name = "dunce"
version = "1.0.5"
@ -2005,7 +2067,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
dependencies = [
"libc",
"windows-sys 0.52.0",
"windows-sys 0.61.2",
]
[[package]]
@ -2170,6 +2232,21 @@ dependencies = [
"webdriver",
]
[[package]]
name = "fast_html2md"
version = "0.0.58"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af3a0122fee1bcf6bb9f3d73782e911cce69d95b76a5e29e930af92cd4a8e4e3"
dependencies = [
"auto_encoder",
"futures-util",
"lazy_static",
"lol_html",
"percent-encoding",
"regex",
"url",
]
[[package]]
name = "fastrand"
version = "2.3.0"
@ -2184,7 +2261,7 @@ checksum = "0ce92ff622d6dadf7349484f42c93271a0d49b7cc4d466a936405bacbe10aa78"
dependencies = [
"cfg-if",
"rustix 1.1.4",
"windows-sys 0.52.0",
"windows-sys 0.59.0",
]
[[package]]
@ -2243,6 +2320,12 @@ version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
[[package]]
name = "foldhash"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb"
[[package]]
name = "form_urlencoded"
version = "1.2.2"
@ -2260,7 +2343,7 @@ checksum = "94e7099f6313ecacbe1256e8ff9d617b75d1bcb16a6fddef94866d225a01a14a"
dependencies = [
"io-lifetimes",
"rustix 1.1.4",
"windows-sys 0.52.0",
"windows-sys 0.59.0",
]
[[package]]
@ -2562,7 +2645,7 @@ version = "0.15.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
dependencies = [
"foldhash",
"foldhash 0.1.5",
"serde",
]
@ -2571,6 +2654,11 @@ name = "hashbrown"
version = "0.16.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
dependencies = [
"allocator-api2",
"equivalent",
"foldhash 0.2.0",
]
[[package]]
name = "hashify"
@ -3184,7 +3272,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2285ddfe3054097ef4b2fe909ef8c3bcd1ea52a8f0d274416caebeef39f04a65"
dependencies = [
"io-lifetimes",
"windows-sys 0.52.0",
"windows-sys 0.59.0",
]
[[package]]
@ -3471,6 +3559,25 @@ version = "0.4.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
[[package]]
name = "lol_html"
version = "2.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ff94cb6aef6ee52afd2c69331e9109906d855e82bd241f3110dfdf6185899ab"
dependencies = [
"bitflags 2.11.0",
"cfg-if",
"cssparser",
"encoding_rs",
"foldhash 0.2.0",
"hashbrown 0.16.1",
"memchr",
"mime",
"precomputed-hash",
"selectors",
"thiserror 2.0.18",
]
[[package]]
name = "lopdf"
version = "0.38.0"
@ -4592,6 +4699,7 @@ version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078"
dependencies = [
"phf_macros 0.11.3",
"phf_shared 0.11.3",
]
@ -4610,6 +4718,7 @@ version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf"
dependencies = [
"phf_macros 0.13.1",
"phf_shared 0.13.1",
"serde",
]
@ -4654,6 +4763,32 @@ dependencies = [
"phf_shared 0.13.1",
]
[[package]]
name = "phf_macros"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f84ac04429c13a7ff43785d75ad27569f2951ce0ffd30a3321230db2fc727216"
dependencies = [
"phf_generator 0.11.3",
"phf_shared 0.11.3",
"proc-macro2",
"quote",
"syn 2.0.117",
]
[[package]]
name = "phf_macros"
version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "812f032b54b1e759ccd5f8b6677695d5268c588701effba24601f6932f8269ef"
dependencies = [
"phf_generator 0.13.1",
"phf_shared 0.13.1",
"proc-macro2",
"quote",
"syn 2.0.117",
]
[[package]]
name = "phf_shared"
version = "0.11.3"
@ -5051,7 +5186,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7"
dependencies = [
"heck",
"itertools 0.10.5",
"itertools 0.14.0",
"log",
"multimap",
"petgraph",
@ -5068,7 +5203,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d"
dependencies = [
"anyhow",
"itertools 0.10.5",
"itertools 0.14.0",
"proc-macro2",
"quote",
"syn 2.0.117",
@ -5081,7 +5216,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b"
dependencies = [
"anyhow",
"itertools 0.10.5",
"itertools 0.14.0",
"proc-macro2",
"quote",
"syn 2.0.117",
@ -5246,7 +5381,7 @@ dependencies = [
"once_cell",
"socket2",
"tracing",
"windows-sys 0.52.0",
"windows-sys 0.60.2",
]
[[package]]
@ -5830,7 +5965,7 @@ dependencies = [
"errno",
"libc",
"linux-raw-sys 0.4.15",
"windows-sys 0.52.0",
"windows-sys 0.59.0",
]
[[package]]
@ -5843,7 +5978,7 @@ dependencies = [
"errno",
"libc",
"linux-raw-sys 0.12.1",
"windows-sys 0.52.0",
"windows-sys 0.61.2",
]
[[package]]
@ -6079,6 +6214,25 @@ dependencies = [
"libc",
]
[[package]]
name = "selectors"
version = "0.33.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "feef350c36147532e1b79ea5c1f3791373e61cbd9a6a2615413b3807bb164fb7"
dependencies = [
"bitflags 2.11.0",
"cssparser",
"derive_more 2.1.1",
"log",
"new_debug_unreachable",
"phf 0.13.1",
"phf_codegen 0.13.1",
"precomputed-hash",
"rustc-hash",
"servo_arc",
"smallvec",
]
[[package]]
name = "self_cell"
version = "1.2.2"
@ -6279,6 +6433,15 @@ dependencies = [
"winapi",
]
[[package]]
name = "servo_arc"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "170fb83ab34de17dc69aa7c67482b22218ddb85da56546f9bd6b929e32a05930"
dependencies = [
"stable_deref_trait",
]
[[package]]
name = "sha1"
version = "0.10.6"
@ -6425,7 +6588,6 @@ dependencies = [
"cfg-if",
"libc",
"psm",
"windows-sys 0.52.0",
"windows-sys 0.59.0",
]
@ -6574,7 +6736,7 @@ dependencies = [
"fd-lock",
"io-lifetimes",
"rustix 0.38.44",
"windows-sys 0.52.0",
"windows-sys 0.59.0",
"winx",
]
@ -6606,7 +6768,7 @@ dependencies = [
"getrandom 0.4.1",
"once_cell",
"rustix 1.1.4",
"windows-sys 0.52.0",
"windows-sys 0.61.2",
]
[[package]]
@ -8528,7 +8690,7 @@ version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
dependencies = [
"windows-sys 0.52.0",
"windows-sys 0.61.2",
]
[[package]]
@ -8812,7 +8974,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f3fd376f71958b862e7afb20cfe5a22830e1963462f3a17f49d82a6c1d1f42d"
dependencies = [
"bitflags 2.11.0",
"windows-sys 0.52.0",
"windows-sys 0.59.0",
]
[[package]]
@ -9075,6 +9237,7 @@ dependencies = [
"dialoguer",
"directories",
"fantoccini",
"fast_html2md",
"futures-util",
"glob",
"hex",

View File

@ -58,8 +58,9 @@ image = { version = "0.25", default-features = false, features = ["jpeg", "png"]
# URL encoding for web search
urlencoding = "2.1"
# HTML to plain text conversion (web_fetch tool)
# HTML to plain text / markdown conversion (web_fetch tool)
nanohtml2text = "0.2"
html2md = { package = "fast_html2md", version = "0.0.58", optional = true }
# Zip archive extraction
zip = { version = "8.1", default-features = false, features = ["deflate"] }
@ -240,7 +241,7 @@ whatsapp-web = ["dep:wa-rs", "dep:wa-rs-core", "dep:wa-rs-binary", "dep:wa-rs-pr
# Optional provider feature flags used by cfg(feature = "...") guards.
# Keep disabled by default to preserve current runtime behavior.
firecrawl = []
web-fetch-html2md = []
web-fetch-html2md = ["dep:html2md"]
[profile.release]
opt-level = "z" # Optimize for size