-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathsparsevec.hpp
More file actions
106 lines (91 loc) · 2.97 KB
/
sparsevec.hpp
File metadata and controls
106 lines (91 loc) · 2.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
/*
* pgvector-cpp v0.3.0
* https://github.com/pgvector/pgvector-cpp
* MIT License
*/
#pragma once
#include <algorithm>
#include <cstddef>
#include <limits>
#include <ostream>
#include <span>
#include <stdexcept>
#include <unordered_map>
#include <vector>
namespace pgvector {
/// A sparse vector.
class SparseVector {
public:
/// Creates a sparse vector from a dense vector.
explicit SparseVector(const std::vector<float>& value) : SparseVector(std::span<const float>{value}) {}
/// Creates a sparse vector from a span.
explicit SparseVector(std::span<const float> value) {
if (value.size() > std::numeric_limits<int>::max()) {
throw std::invalid_argument{"sparsevec cannot have more than max int dimensions"};
}
dimensions_ = static_cast<int>(value.size());
// do not reserve capacity for indices/values since likely many zeros
for (size_t i = 0; i < value.size(); i++) {
float v = value[i];
if (v != 0) {
indices_.push_back(static_cast<int>(i));
values_.push_back(v);
}
}
}
/// Creates a sparse vector from a map of non-zero elements.
SparseVector(const std::unordered_map<int, float>& map, int dimensions) {
if (dimensions < 0) {
throw std::invalid_argument{"sparsevec cannot have negative dimensions"};
}
dimensions_ = dimensions;
// could probably reserve capacity for indices since not expecting zeros
for (const auto& [i, v] : map) {
if (i < 0 || i >= dimensions) {
throw std::invalid_argument{"sparsevec index out of bounds"};
}
if (v != 0) {
indices_.push_back(i);
}
}
std::ranges::sort(indices_);
values_.reserve(indices_.size());
for (const auto i : indices_) {
values_.push_back(map.at(i));
}
}
/// Returns the number of dimensions.
int dimensions() const {
return dimensions_;
}
/// Returns the non-zero indices.
const std::vector<int>& indices() const {
return indices_;
}
/// Returns the non-zero values.
const std::vector<float>& values() const {
return values_;
}
friend bool operator==(const SparseVector& lhs, const SparseVector& rhs) {
return lhs.dimensions_ == rhs.dimensions_ && lhs.indices_ == rhs.indices_ && lhs.values_ == rhs.values_;
}
friend std::ostream& operator<<(std::ostream& os, const SparseVector& value) {
os << "{";
for (size_t i = 0; i < value.indices_.size(); i++) {
if (i > 0) {
os << ",";
}
os << value.indices_[i] + 1;
os << ":";
os << value.values_[i];
}
os << "}/";
os << value.dimensions_;
return os;
}
private:
int dimensions_;
std::vector<int> indices_;
std::vector<float> values_;
};
} // namespace pgvector