ReUseX
0.0.5
3D Point Cloud Processing for Building Reuse
Toggle main menu visibility
Loading...
Searching...
No Matches
IDataset.hpp
Go to the documentation of this file.
1
// SPDX-FileCopyrightText: 2025 Povl Filip Sonne-Frederiksen
2
//
3
// SPDX-License-Identifier: GPL-3.0-or-later
4
#pragma once
5
#include "reusex/vision/IData.hpp"
6
7
#include <opencv2/core/mat.hpp>
8
9
#include <filesystem>
10
#include <memory>
11
#include <span>
12
#include <vector>
13
14
// Forward declaration
15
namespace
reusex
{
16
class
ProjectDB
;
17
}
18
19
namespace
reusex::vision
{
20
/* Interface for datasets. A dataset is a collection of data samples, where each
21
* sample consists of an image and a label. The dataset is stored in a SQLite
22
* database, where each sample is stored as a row in a table. The table has the
23
* following columns: - id: an integer primary key that uniquely identifies the
24
* sample - image: a blob that contains the image data - label: an integer that
25
* represents the label of the sample. The dataset provides methods for
26
* retrieving samples and saving new samples to the database. The get method
27
* retrieves a sample by its index, and the save method saves a batch of samples
28
* to the database. The dataset also provides methods for retrieving and saving
29
* images, which are used internally by the get and save methods. The dataset is
30
* designed to be used with the IData interface, which represents a single data
31
* sample. The IData interface provides methods for accessing the image and
32
* label of a sample, and for saving the sample to the database. The dataset is
33
* intended to be used in machine learning applications, where it can be used to
34
* train and evaluate models on a collection of labeled images. */
35
class
IDataset
{
36
public
:
37
/* A pair of a data sample and its index. The data sample is represented as a
38
* unique pointer to an IData object, and the index is a size_t that
39
* represents the position of the sample in the dataset. The get method
40
* returns a Pair, which allows the caller to access both the data sample
41
* and its index. The save method takes a span of Pairs, which allows the
42
* caller to save a batch of samples to the database. */
43
using
Pair
= std::pair<std::unique_ptr<IData>,
size_t
>;
44
45
/* Constructs a new IDataset object with a shared database instance.
46
*
47
* This constructor allows multiple IDataset instances to share the same
48
* database connection. The database is managed by shared_ptr, so it will
49
* remain open as long as any IDataset instance references it.
50
*
51
* @param database Shared pointer to ProjectDB instance
52
*/
53
explicit
IDataset
(std::shared_ptr<ProjectDB>
database
);
54
55
/* Constructs a new IDataset object by opening a database at the given path.
56
*
57
* This convenience constructor creates a new ProjectDB instance internally
58
* and stores it as a shared_ptr. The database connection is managed by the
59
* IDataset and will be closed when the last reference is destroyed.
60
*
61
* @param dbPath The path to the ReUseX project database file.
62
*/
63
explicit
IDataset
(std::filesystem::path dbPath);
64
65
/* Virtual destructor to ensure proper cleanup of derived classes. */
66
virtual
~IDataset
() =
default
;
67
68
/* Returns the number of samples in the dataset. The size method returns the
69
* number of samples in the dataset, which is equal to the size of the ids_
70
* vector. The size method is used by the caller to determine how many samples
71
* are available in the dataset, and to iterate over the samples using their
72
* indices. The size method is a const method, which means that it does not
73
* modify the state of the IDataset object.
74
* @return The number of samples in the dataset.
75
*/
76
size_t
size
()
const
;
77
86
size_t
filter_annotated
();
87
88
/* Retrieves a sample by its index. The get method takes an index as input,
89
* which is used to look up the corresponding sample ID in the ids_ vector.
90
* The get method then retrieves the image and label for the sample from the
91
* database, and returns a Pair containing a unique pointer to an IData object
92
* that represents the sample, and the index of the sample in the dataset. The
93
* get method is a const method, which means that it does not modify the state
94
* of the IDataset object. The get method is a pure virtual method, which
95
* means that it must be implemented by derived classes.
96
* @param index The index of the sample to retrieve.
97
* @return A Pair containing a unique pointer to an IData object that
98
* represents the sample, and the index of the sample in the dataset.
99
*/
100
virtual
Pair
get
(
const
std::size_t index)
const
= 0;
101
102
/* Saves a batch of samples to the database. The save method takes a span of
103
* Pairs as input, which allows the caller to save a batch of samples to the
104
* database. The save method iterates over the span of Pairs, and for each
105
* Pair, it retrieves the IData object and its index, and saves the image and
106
* label for the sample to the database. The save method returns true if all
107
* samples were saved successfully, and false otherwise. The save method is a
108
* pure virtual method, which means that it must be implemented by derived
109
* classes.
110
* @param data A span of Pairs, where each Pair contains a unique pointer to
111
* an IData object that represents a sample, and the index of the sample in
112
* the dataset.
113
* @return true if all samples were saved successfully, and false otherwise.
114
*/
115
virtual
bool
save
(
const
std::span<Pair> &data) = 0;
116
117
protected
:
118
/* Retrieves the image data for a sample from the database. The getImage
119
* method takes an index as input, which is used to look up the corresponding
120
* sample ID in the ids_ vector. The getImage method then retrieves the image
121
* data for the sample from the database, and returns it as a cv::Mat object.
122
* The getImage method is a const method, which means that it does not modify
123
* the state of the IDataset object. The getImage method is used internally by
124
* the get method to retrieve the image data for a sample when constructing an
125
* IData object to represent the sample.
126
* @param index The index of the sample whose image data to retrieve.
127
* @return A cv::Mat object containing the image data for the sample.
128
*/
129
cv::Mat
image
(
const
std::size_t index)
const
;
130
131
/* Saves the image data for a sample to the database. The saveImage method
132
* takes an index and a cv::Mat object as input, which represent the index of
133
* the sample and the image data to save, respectively. The saveImage method
134
* saves the image data for the sample to the database, and returns true if
135
* the image was saved successfully, and false otherwise. The saveImage method
136
* is used internally by the save method to save the image data for a sample
137
* when saving a batch of samples to the database.
138
* @param index The index of the sample whose image data to save.
139
* @param image A cv::Mat object containing the image data to save for the
140
* sample.
141
* @return true if the image was saved successfully, and false otherwise.
142
*/
143
bool
save_image
(
const
std::size_t index,
const
cv::Mat &
image
);
144
145
/* Access to the underlying database for subclasses.
146
*
147
* Subclasses can use this to access database functionality beyond the
148
* basic getImage/saveImage interface if needed.
149
*
150
* @return Shared pointer to the ProjectDB instance
151
*/
152
std::shared_ptr<ProjectDB>
database
()
const
;
153
154
private
:
155
/* Shared pointer to the project database. Multiple IDataset instances can
156
* share the same database connection. The database connection is managed
157
* via RAII and will be closed when the last reference is destroyed.
158
*/
159
std::shared_ptr<ProjectDB> db_;
160
161
/* Cached list of node IDs in the dataset. This is populated once during
162
* construction by querying the database. The IDs are used to map from
163
* dataset indices (0, 1, 2, ...) to RTABMap node IDs.
164
*/
165
std::vector<int> ids_;
166
};
167
}
// namespace reusex::vision
reusex::ProjectDB
Definition
ProjectDB.hpp:31
reusex::vision::IDataset::IDataset
IDataset(std::filesystem::path dbPath)
reusex::vision::IDataset::~IDataset
virtual ~IDataset()=default
reusex::vision::IDataset::get
virtual Pair get(const std::size_t index) const =0
reusex::vision::IDataset::save
virtual bool save(const std::span< Pair > &data)=0
reusex::vision::IDataset::IDataset
IDataset(std::shared_ptr< ProjectDB > database)
reusex::vision::IDataset::size
size_t size() const
reusex::vision::IDataset::filter_annotated
size_t filter_annotated()
Remove already-annotated frames from the dataset.
reusex::vision::IDataset::Pair
std::pair< std::unique_ptr< IData >, size_t > Pair
Definition
IDataset.hpp:43
reusex::vision::IDataset::image
cv::Mat image(const std::size_t index) const
reusex::vision::IDataset::database
std::shared_ptr< ProjectDB > database() const
reusex::vision::IDataset::save_image
bool save_image(const std::size_t index, const cv::Mat &image)
reusex::vision
Definition
annotate.hpp:8
reusex
Definition
filter_expression.hpp:12
libs
reusex
include
vision
IDataset.hpp
Generated by
1.17.0