Compare commits

...

5 Commits

Author SHA1 Message Date
Erik Johnston
dcd574b89c Stuff 2023-01-03 09:42:37 +00:00
Erik Johnston
c03785e121 Implement {get,pop}_node 2022-12-24 13:44:17 +00:00
Erik Johnston
b9cdf3d85e String cache 2022-12-24 12:36:54 +00:00
Erik Johnston
18ac015ecd bindings 2022-12-05 14:03:28 +00:00
Erik Johnston
4874d6320a Add tree cache 2022-12-05 09:57:38 +00:00
5 changed files with 751 additions and 0 deletions

View File

@@ -3,3 +3,7 @@
[workspace]
members = ["rust"]
[profile.dbgrelease]
inherits = "release"
debug = true

View File

@@ -0,0 +1,77 @@
// Copyright 2022 The Matrix.org Foundation C.I.C.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#![feature(test)]
use synapse::tree_cache::TreeCache;
use test::Bencher;
extern crate test;
#[bench]
fn bench_tree_cache_get_non_empty(b: &mut Bencher) {
let mut cache: TreeCache<&str, &str> = TreeCache::new();
cache.set(["a", "b", "c", "d"], "f").unwrap();
b.iter(|| cache.get(&["a", "b", "c", "d"]));
}
#[bench]
fn bench_tree_cache_get_empty(b: &mut Bencher) {
let cache: TreeCache<&str, &str> = TreeCache::new();
b.iter(|| cache.get(&["a", "b", "c", "d"]));
}
#[bench]
fn bench_tree_cache_set(b: &mut Bencher) {
let mut cache: TreeCache<&str, &str> = TreeCache::new();
b.iter(|| cache.set(["a", "b", "c", "d"], "f").unwrap());
}
#[bench]
fn bench_tree_cache_length(b: &mut Bencher) {
let mut cache: TreeCache<u32, u32> = TreeCache::new();
for c1 in 0..=10 {
for c2 in 0..=10 {
for c3 in 0..=10 {
for c4 in 0..=10 {
cache.set([c1, c2, c3, c4], 1).unwrap()
}
}
}
}
b.iter(|| cache.len());
}
#[bench]
fn tree_cache_iterate(b: &mut Bencher) {
let mut cache: TreeCache<u32, u32> = TreeCache::new();
for c1 in 0..=10 {
for c2 in 0..=10 {
for c3 in 0..=10 {
for c4 in 0..=10 {
cache.set([c1, c2, c3, c4], 1).unwrap()
}
}
}
}
b.iter(|| cache.items().count());
}

View File

@@ -1,6 +1,7 @@
use pyo3::prelude::*;
pub mod push;
pub mod tree_cache;
/// Returns the hash of all the rust source files at the time it was compiled.
///
@@ -26,6 +27,7 @@ fn synapse_rust(py: Python<'_>, m: &PyModule) -> PyResult<()> {
m.add_function(wrap_pyfunction!(get_rust_file_digest, m)?)?;
push::register_module(py, m)?;
tree_cache::binding::register_module(py, m)?;
Ok(())
}

View File

@@ -0,0 +1,247 @@
use std::hash::Hash;
use anyhow::Error;
use pyo3::{
pyclass, pymethods,
types::{PyModule, PyTuple},
IntoPy, PyAny, PyObject, PyResult, Python, ToPyObject,
};
use super::TreeCache;
pub fn register_module(py: Python<'_>, m: &PyModule) -> PyResult<()> {
let child_module = PyModule::new(py, "tree_cache")?;
child_module.add_class::<PythonTreeCache>()?;
child_module.add_class::<StringTreeCache>()?;
m.add_submodule(child_module)?;
// We need to manually add the module to sys.modules to make `from
// synapse.synapse_rust import push` work.
py.import("sys")?
.getattr("modules")?
.set_item("synapse.synapse_rust.tree_cache", child_module)?;
Ok(())
}
#[derive(Clone)]
struct HashablePyObject {
obj: PyObject,
hash: isize,
}
impl HashablePyObject {
pub fn new(obj: &PyAny) -> Result<Self, Error> {
let hash = obj.hash()?;
Ok(HashablePyObject {
obj: obj.to_object(obj.py()),
hash,
})
}
}
impl IntoPy<PyObject> for HashablePyObject {
fn into_py(self, _: Python<'_>) -> PyObject {
self.obj.clone()
}
}
impl IntoPy<PyObject> for &HashablePyObject {
fn into_py(self, _: Python<'_>) -> PyObject {
self.obj.clone()
}
}
impl ToPyObject for HashablePyObject {
fn to_object(&self, _py: Python<'_>) -> PyObject {
self.obj.clone()
}
}
impl Hash for HashablePyObject {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.hash.hash(state);
}
}
impl PartialEq for HashablePyObject {
fn eq(&self, other: &Self) -> bool {
let equal = Python::with_gil(|py| {
let result = self.obj.as_ref(py).eq(other.obj.as_ref(py));
result.unwrap_or(false)
});
equal
}
}
impl Eq for HashablePyObject {}
#[pyclass]
struct PythonTreeCache(TreeCache<HashablePyObject, PyObject>);
#[pymethods]
impl PythonTreeCache {
#[new]
fn new() -> Self {
PythonTreeCache(Default::default())
}
pub fn set(&mut self, key: &PyAny, value: PyObject) -> Result<(), Error> {
let v: Vec<HashablePyObject> = key
.iter()?
.map(|obj| HashablePyObject::new(obj?))
.collect::<Result<_, _>>()?;
self.0.set(v, value)?;
Ok(())
}
pub fn get_node<'a>(
&'a self,
py: Python<'a>,
key: &'a PyAny,
) -> Result<Option<Vec<(&'a PyTuple, &'a PyObject)>>, Error> {
let v: Vec<HashablePyObject> = key
.iter()?
.map(|obj| HashablePyObject::new(obj?))
.collect::<Result<_, _>>()?;
let Some(node) = self.0.get_node(v.clone())? else {
return Ok(None)
};
let items = node
.items()
.map(|(k, value)| {
let vec = v.iter().chain(k.iter().map(|a| *a)).collect::<Vec<_>>();
let nk = PyTuple::new(py, vec);
(nk, value)
})
.collect::<Vec<_>>();
Ok(Some(items))
}
pub fn get(&self, key: &PyAny) -> Result<Option<&PyObject>, Error> {
let v: Vec<HashablePyObject> = key
.iter()?
.map(|obj| HashablePyObject::new(obj?))
.collect::<Result<_, _>>()?;
Ok(self.0.get(&v)?)
}
pub fn pop_node<'a>(
&'a mut self,
py: Python<'a>,
key: &'a PyAny,
) -> Result<Option<Vec<(&'a PyTuple, PyObject)>>, Error> {
let v: Vec<HashablePyObject> = key
.iter()?
.map(|obj| HashablePyObject::new(obj?))
.collect::<Result<_, _>>()?;
let Some(node) = self.0.pop_node(v.clone())? else {
return Ok(None)
};
let items = node
.into_items()
.map(|(k, value)| {
let vec = v.iter().chain(k.iter()).collect::<Vec<_>>();
let nk = PyTuple::new(py, vec);
(nk, value)
})
.collect::<Vec<_>>();
Ok(Some(items))
}
pub fn pop(&mut self, key: &PyAny) -> Result<Option<PyObject>, Error> {
let v: Vec<HashablePyObject> = key
.iter()?
.map(|obj| HashablePyObject::new(obj?))
.collect::<Result<_, _>>()?;
Ok(self.0.pop(&v)?)
}
pub fn clear(&mut self) {
self.0.clear()
}
pub fn len(&self) -> usize {
self.0.len()
}
pub fn values(&self) -> Vec<&PyObject> {
self.0.values().collect()
}
pub fn items(&self) -> Vec<(Vec<&HashablePyObject>, &PyObject)> {
todo!()
}
}
#[pyclass]
struct StringTreeCache(TreeCache<String, String>);
#[pymethods]
impl StringTreeCache {
#[new]
fn new() -> Self {
StringTreeCache(Default::default())
}
pub fn set(&mut self, key: &PyAny, value: String) -> Result<(), Error> {
let key = key
.iter()?
.map(|o| o.expect("iter failed").extract().expect("not a string"));
self.0.set(key, value)?;
Ok(())
}
// pub fn get_node(&self, key: &PyAny) -> Result<Option<&TreeCacheNode<K, PyObject>>, Error> {
// todo!()
// }
pub fn get(&self, key: &PyAny) -> Result<Option<&String>, Error> {
let key = key.iter()?.map(|o| {
o.expect("iter failed")
.extract::<String>()
.expect("not a string")
});
Ok(self.0.get(key)?)
}
// pub fn pop_node(&mut self, key: &PyAny) -> Result<Option<TreeCacheNode<K, PyObject>>, Error> {
// todo!()
// }
pub fn pop(&mut self, key: Vec<String>) -> Result<Option<String>, Error> {
Ok(self.0.pop(&key)?)
}
pub fn clear(&mut self) {
self.0.clear()
}
pub fn len(&self) -> usize {
self.0.len()
}
pub fn values(&self) -> Vec<&String> {
self.0.values().collect()
}
pub fn items(&self) -> Vec<(Vec<&HashablePyObject>, &PyObject)> {
todo!()
}
}

421
rust/src/tree_cache/mod.rs Normal file
View File

@@ -0,0 +1,421 @@
use std::{borrow::Borrow, collections::HashMap, hash::Hash};
use anyhow::{bail, Error};
pub mod binding;
pub enum TreeCacheNode<K, V> {
Leaf(V),
Branch(usize, HashMap<K, TreeCacheNode<K, V>>),
}
impl<K, V> TreeCacheNode<K, V> {
pub fn new_branch() -> Self {
TreeCacheNode::Branch(0, Default::default())
}
fn len(&self) -> usize {
match self {
TreeCacheNode::Leaf(_) => 1,
TreeCacheNode::Branch(size, _) => *size,
}
}
}
impl<'a, K: Eq + Hash + 'a, V> TreeCacheNode<K, V> {
pub fn set(
&mut self,
mut key: impl Iterator<Item = K>,
value: V,
) -> Result<(usize, usize), Error> {
if let Some(k) = key.next() {
match self {
TreeCacheNode::Leaf(_) => bail!("Given key is too long"),
TreeCacheNode::Branch(size, map) => {
let node = map.entry(k).or_insert_with(TreeCacheNode::new_branch);
let (added, removed) = node.set(key, value)?;
*size += added;
*size -= removed;
Ok((added, removed))
}
}
} else {
let added = if let TreeCacheNode::Branch(_, map) = self {
(1, map.len())
} else {
(0, 0)
};
*self = TreeCacheNode::Leaf(value);
Ok(added)
}
}
pub fn pop<Q>(
&mut self,
current_key: Q,
mut next_keys: impl Iterator<Item = Q>,
) -> Result<Option<TreeCacheNode<K, V>>, Error>
where
Q: Borrow<K>,
Q: Hash + Eq + 'a,
{
if let Some(next_key) = next_keys.next() {
match self {
TreeCacheNode::Leaf(_) => bail!("Given key is too long"),
TreeCacheNode::Branch(size, map) => {
let node = if let Some(node) = map.get_mut(current_key.borrow()) {
node
} else {
return Ok(None);
};
if let Some(popped) = node.pop(next_key, next_keys)? {
*size -= node.len();
Ok(Some(popped))
} else {
Ok(None)
}
}
}
} else {
match self {
TreeCacheNode::Leaf(_) => bail!("Given key is too long"),
TreeCacheNode::Branch(size, map) => {
if let Some(node) = map.remove(current_key.borrow()) {
*size -= node.len();
Ok(Some(node))
} else {
Ok(None)
}
}
}
}
}
pub fn items(&'a self) -> impl Iterator<Item = (Vec<&K>, &V)> {
// To avoid a lot of mallocs we guess the length of the key. Ideally
// we'd know this.
let capacity_guesstimate = 10;
let mut stack = vec![(Vec::with_capacity(capacity_guesstimate), self)];
std::iter::from_fn(move || {
while let Some((prefix, node)) = stack.pop() {
match node {
TreeCacheNode::Leaf(value) => return Some((prefix, value)),
TreeCacheNode::Branch(_, map) => {
stack.extend(map.iter().map(|(k, v)| {
let mut new_prefix = Vec::with_capacity(capacity_guesstimate);
new_prefix.extend_from_slice(&prefix);
new_prefix.push(k);
(new_prefix, v)
}));
}
}
}
None
})
}
pub fn values(&'a self) -> impl Iterator<Item = &V> {
let mut stack = vec![self];
std::iter::from_fn(move || {
while let Some(node) = stack.pop() {
match node {
TreeCacheNode::Leaf(value) => return Some(value),
TreeCacheNode::Branch(_, map) => {
stack.extend(map.iter().map(|(_k, v)| v));
}
}
}
None
})
}
}
impl<'a, K: Clone + Eq + Hash + 'a, V> TreeCacheNode<K, V> {
pub fn into_items(self) -> impl Iterator<Item = (Vec<K>, V)> {
let mut stack = vec![(Vec::new(), self)];
std::iter::from_fn(move || {
while let Some((prefix, node)) = stack.pop() {
match node {
TreeCacheNode::Leaf(value) => return Some((prefix, value)),
TreeCacheNode::Branch(_, map) => {
stack.extend(map.into_iter().map(|(k, v)| {
let mut prefix = prefix.clone();
prefix.push(k);
(prefix, v)
}));
}
}
}
None
})
}
}
impl<K, V> Default for TreeCacheNode<K, V> {
fn default() -> Self {
TreeCacheNode::new_branch()
}
}
pub struct TreeCache<K, V> {
root: TreeCacheNode<K, V>,
}
impl<K, V> TreeCache<K, V> {
pub fn new() -> Self {
TreeCache {
root: TreeCacheNode::new_branch(),
}
}
}
impl<'a, K: Eq + Hash + 'a, V> TreeCache<K, V> {
pub fn set(&mut self, key: impl IntoIterator<Item = K>, value: V) -> Result<(), Error> {
self.root.set(key.into_iter(), value)?;
Ok(())
}
pub fn get_node<Q>(
&self,
key: impl IntoIterator<Item = Q>,
) -> Result<Option<&TreeCacheNode<K, V>>, Error>
where
Q: Borrow<K>,
Q: Hash + Eq + 'a,
{
let mut node = &self.root;
for k in key {
match node {
TreeCacheNode::Leaf(_) => bail!("Given key is too long"),
TreeCacheNode::Branch(_, map) => {
node = if let Some(node) = map.get(k.borrow()) {
node
} else {
return Ok(None);
};
}
}
}
Ok(Some(node))
}
pub fn get<Q>(&self, key: impl IntoIterator<Item = Q>) -> Result<Option<&V>, Error>
where
Q: Borrow<K>,
Q: Hash + Eq + 'a,
{
if let Some(node) = self.get_node(key)? {
match node {
TreeCacheNode::Leaf(value) => Ok(Some(value)),
TreeCacheNode::Branch(_, _) => bail!("Given key is too short"),
}
} else {
Ok(None)
}
}
pub fn pop_node<Q>(
&mut self,
key: impl IntoIterator<Item = Q>,
) -> Result<Option<TreeCacheNode<K, V>>, Error>
where
Q: Borrow<K>,
Q: Hash + Eq + 'a,
{
let mut key_iter = key.into_iter();
let k = if let Some(k) = key_iter.next() {
k
} else {
let node = std::mem::replace(&mut self.root, TreeCacheNode::new_branch());
return Ok(Some(node));
};
self.root.pop(k, key_iter)
}
pub fn pop(&mut self, key: &[K]) -> Result<Option<V>, Error> {
if let Some(node) = self.pop_node(key)? {
match node {
TreeCacheNode::Leaf(value) => Ok(Some(value)),
TreeCacheNode::Branch(_, _) => bail!("Given key is too short"),
}
} else {
Ok(None)
}
}
pub fn clear(&mut self) {
self.root = TreeCacheNode::new_branch();
}
pub fn len(&self) -> usize {
match self.root {
TreeCacheNode::Leaf(_) => 1,
TreeCacheNode::Branch(size, _) => size,
}
}
pub fn values(&self) -> impl Iterator<Item = &V> {
let mut stack = vec![&self.root];
std::iter::from_fn(move || {
while let Some(node) = stack.pop() {
match node {
TreeCacheNode::Leaf(value) => return Some(value),
TreeCacheNode::Branch(_, map) => {
stack.extend(map.values());
}
}
}
None
})
}
pub fn items(&self) -> impl Iterator<Item = (Vec<&K>, &V)> {
self.root.items()
}
}
impl<K, V> Default for TreeCache<K, V> {
fn default() -> Self {
TreeCache::new()
}
}
#[cfg(test)]
mod test {
use std::collections::BTreeSet;
use super::*;
#[test]
fn get_set() -> Result<(), Error> {
let mut cache = TreeCache::new();
cache.set(vec!["a", "b"], "c")?;
assert_eq!(cache.get(&["a", "b"])?, Some(&"c"));
let node = cache.get_node(&["a"])?.unwrap();
match node {
TreeCacheNode::Leaf(_) => bail!("expected branch"),
TreeCacheNode::Branch(_, map) => {
assert_eq!(map.len(), 1);
assert!(map.contains_key("b"));
}
}
Ok(())
}
#[test]
fn length() -> Result<(), Error> {
let mut cache = TreeCache::new();
cache.set(vec!["a", "b"], "c")?;
assert_eq!(cache.len(), 1);
cache.set(vec!["a", "b"], "d")?;
assert_eq!(cache.len(), 1);
cache.set(vec!["e", "f"], "g")?;
assert_eq!(cache.len(), 2);
cache.set(vec!["e", "h"], "i")?;
assert_eq!(cache.len(), 3);
cache.set(vec!["e"], "i")?;
assert_eq!(cache.len(), 2);
cache.pop_node(&["a"])?;
assert_eq!(cache.len(), 1);
Ok(())
}
#[test]
fn clear() -> Result<(), Error> {
let mut cache = TreeCache::new();
cache.set(vec!["a", "b"], "c")?;
assert_eq!(cache.len(), 1);
cache.clear();
assert_eq!(cache.len(), 0);
assert_eq!(cache.get(&["a", "b"])?, None);
Ok(())
}
#[test]
fn pop() -> Result<(), Error> {
let mut cache = TreeCache::new();
cache.set(vec!["a", "b"], "c")?;
assert_eq!(cache.pop(&["a", "b"])?, Some("c"));
assert_eq!(cache.pop(&["a", "b"])?, None);
Ok(())
}
#[test]
fn values() -> Result<(), Error> {
let mut cache = TreeCache::new();
cache.set(vec!["a", "b"], "c")?;
let expected = ["c"].iter().collect();
assert_eq!(cache.values().collect::<BTreeSet<_>>(), expected);
cache.set(vec!["d", "e"], "f")?;
let expected = ["c", "f"].iter().collect();
assert_eq!(cache.values().collect::<BTreeSet<_>>(), expected);
Ok(())
}
#[test]
fn items() -> Result<(), Error> {
let mut cache = TreeCache::new();
cache.set(vec!["a", "b"], "c")?;
cache.set(vec!["d", "e"], "f")?;
let expected = [(vec![&"a", &"b"], &"c"), (vec![&"d", &"e"], &"f")]
.into_iter()
.collect();
assert_eq!(cache.items().collect::<BTreeSet<_>>(), expected);
Ok(())
}
}