feat: text splitting and storage

This commit is contained in:
Per Stark
2024-11-20 12:10:23 +01:00
parent 8ba853a329
commit c3ccb8c034
6 changed files with 257 additions and 89 deletions

131
Cargo.lock generated
View File

@@ -261,7 +261,7 @@ checksum = "965c2d33e53cb6b267e148a4cb0760bc01f4904c1cd4bb4002a085bb016d1490"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.77",
"syn 2.0.87",
"synstructure",
]
@@ -273,7 +273,7 @@ checksum = "7b18050c2cd6fe86c3a76584ef5e0baf286d038cda203eb6223df2cc413565f7"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.77",
"syn 2.0.87",
]
[[package]]
@@ -391,7 +391,7 @@ dependencies = [
"proc-macro2",
"quote",
"strum",
"syn 2.0.77",
"syn 2.0.87",
"thiserror",
]
@@ -535,7 +535,7 @@ checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.77",
"syn 2.0.87",
]
[[package]]
@@ -552,7 +552,7 @@ checksum = "a27b8a3a6e1a44fa4c8baf1f653e4172e81486d4941f2237e20dc2d0cf4ddff1"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.77",
"syn 2.0.87",
]
[[package]]
@@ -572,6 +572,18 @@ version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
[[package]]
name = "auto_enums"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "459b77b7e855f875fd15f101064825cd79eb83185a961d66e6298560126facfb"
dependencies = [
"derive_utils",
"proc-macro2",
"quote",
"syn 2.0.87",
]
[[package]]
name = "autocfg"
version = "1.3.0"
@@ -643,7 +655,7 @@ checksum = "57d123550fa8d071b7255cb0cc04dc302baa6c8c4a79f55701552684d8399bce"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.77",
"syn 2.0.87",
]
[[package]]
@@ -675,7 +687,7 @@ dependencies = [
"heck 0.5.0",
"proc-macro-error",
"quote",
"syn 2.0.77",
"syn 2.0.87",
"ubyte",
]
@@ -870,7 +882,7 @@ dependencies = [
"proc-macro-crate",
"proc-macro2",
"quote",
"syn 2.0.77",
"syn 2.0.87",
"syn_derive",
]
@@ -1178,7 +1190,7 @@ dependencies = [
"proc-macro2",
"quote",
"strsim",
"syn 2.0.77",
"syn 2.0.87",
]
[[package]]
@@ -1189,7 +1201,7 @@ checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806"
dependencies = [
"darling_core",
"quote",
"syn 2.0.77",
"syn 2.0.87",
]
[[package]]
@@ -1246,7 +1258,7 @@ checksum = "8034092389675178f570469e6c3b0465d3d30b4505c294a6550db47f3c17ad18"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.77",
"syn 2.0.87",
]
[[package]]
@@ -1277,7 +1289,7 @@ dependencies = [
"darling",
"proc-macro2",
"quote",
"syn 2.0.77",
"syn 2.0.87",
]
[[package]]
@@ -1287,7 +1299,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4abae7035bf79b9877b779505d8cf3749285b80c43941eda66604841889451dc"
dependencies = [
"derive_builder_core",
"syn 2.0.77",
"syn 2.0.87",
]
[[package]]
name = "derive_utils"
version = "0.14.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "65f152f4b8559c4da5d574bafc7af85454d706b4c5fe8b530d508cacbb6807ea"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.87",
]
[[package]]
@@ -1345,7 +1368,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.77",
"syn 2.0.87",
]
[[package]]
@@ -1643,7 +1666,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.77",
"syn 2.0.87",
]
[[package]]
@@ -1864,7 +1887,7 @@ dependencies = [
"markup5ever",
"proc-macro2",
"quote",
"syn 2.0.77",
"syn 2.0.87",
]
[[package]]
@@ -2350,7 +2373,7 @@ checksum = "49e7bc1560b95a3c4a25d03de42fe76ca718ab92d1a22a55b9b4cf67b3ae635c"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.77",
"syn 2.0.87",
]
[[package]]
@@ -2419,7 +2442,7 @@ dependencies = [
"cfg-if",
"proc-macro2",
"quote",
"syn 2.0.77",
"syn 2.0.87",
]
[[package]]
@@ -2616,9 +2639,9 @@ dependencies = [
[[package]]
name = "once_cell"
version = "1.19.0"
version = "1.20.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775"
[[package]]
name = "openssl-probe"
@@ -2824,7 +2847,7 @@ dependencies = [
"phf_shared 0.11.2",
"proc-macro2",
"quote",
"syn 2.0.77",
"syn 2.0.87",
"unicase",
]
@@ -2870,7 +2893,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.77",
"syn 2.0.87",
]
[[package]]
@@ -3278,7 +3301,7 @@ checksum = "bcc303e793d3734489387d205e9b186fac9c6cfacedd98cbb2e8a5943595f3e6"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.77",
"syn 2.0.87",
]
[[package]]
@@ -3419,7 +3442,7 @@ checksum = "5f0ec466e5d8dca9965eb6871879677bef5590cf7525ad96cae14376efb75073"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.77",
"syn 2.0.87",
]
[[package]]
@@ -3828,7 +3851,7 @@ checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.77",
"syn 2.0.87",
]
[[package]]
@@ -3893,7 +3916,7 @@ dependencies = [
"darling",
"proc-macro2",
"quote",
"syn 2.0.77",
"syn 2.0.87",
]
[[package]]
@@ -4157,7 +4180,7 @@ dependencies = [
"proc-macro2",
"quote",
"rustversion",
"syn 2.0.77",
"syn 2.0.87",
]
[[package]]
@@ -4312,9 +4335,9 @@ dependencies = [
[[package]]
name = "syn"
version = "2.0.77"
version = "2.0.87"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9f35bcdf61fd8e7be6caf75f429fdca8beb3ed76584befb503b1569faee373ed"
checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d"
dependencies = [
"proc-macro2",
"quote",
@@ -4330,7 +4353,7 @@ dependencies = [
"proc-macro-error",
"proc-macro2",
"quote",
"syn 2.0.77",
"syn 2.0.87",
]
[[package]]
@@ -4356,7 +4379,7 @@ checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.77",
"syn 2.0.87",
]
[[package]]
@@ -4419,23 +4442,40 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76"
[[package]]
name = "thiserror"
version = "1.0.63"
name = "text-splitter"
version = "0.18.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724"
checksum = "189450e9eaff1a8037cca4d60ca62c134a7e601187430bdd487c86e25e8d6641"
dependencies = [
"ahash 0.8.11",
"auto_enums",
"either",
"itertools 0.13.0",
"once_cell",
"regex",
"strum",
"thiserror",
"unicode-segmentation",
]
[[package]]
name = "thiserror"
version = "1.0.69"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.63"
version = "1.0.69"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261"
checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.77",
"syn 2.0.87",
]
[[package]]
@@ -4529,7 +4569,7 @@ checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.77",
"syn 2.0.87",
]
[[package]]
@@ -4649,7 +4689,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.77",
"syn 2.0.87",
]
[[package]]
@@ -4805,6 +4845,12 @@ dependencies = [
"unicode-script",
]
[[package]]
name = "unicode-segmentation"
version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
[[package]]
name = "unicode-width"
version = "0.1.14"
@@ -4923,7 +4969,7 @@ dependencies = [
"once_cell",
"proc-macro2",
"quote",
"syn 2.0.77",
"syn 2.0.87",
"wasm-bindgen-shared",
]
@@ -4957,7 +5003,7 @@ checksum = "afc340c74d9005395cf9dd098506f7f44e38f2b4a21c6aaacf9a105ea5e1e836"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.77",
"syn 2.0.87",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
@@ -5324,7 +5370,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.77",
"syn 2.0.87",
]
[[package]]
@@ -5351,6 +5397,7 @@ dependencies = [
"sha2",
"surrealdb",
"tempfile",
"text-splitter",
"thiserror",
"tokio",
"tracing",