@conference {690113, title = {Cosine: A Cloud-Cost Optimized Self-Designing Key-Value Storage Engine}, booktitle = {Proceedings of the Very Large Databases Endowment (PVLDB)}, year = {2022}, abstract = { We present a self-designing key-value storage engine, Cosine, which can always take the shape of the close to {\textquotedblleft}perfect{\textquotedblright} engine architec- ture given an input workload, a cloud budget, a target performance, and required cloud SLAs. By identifying and formalizing the first principles of storage engine layouts and core key-value algorithms, Cosine constructs a massive design space comprising of sextillion (10^36) possible storage engine designs over a diverse space of hardware and cloud pricing policies for three cloud providers {\textendash} AWS, GCP, and Azure. Cosine spans across diverse designs such as Log-Structured Merge-trees, B-trees, Log-Structured Hash-tables, in-memory accelerators for filters and indexes as well as trillions of hybrid designs that do not appear in the literature or industry but emerge as valid combinations of the above. Cosine includes a unified distribution-aware I/O model and a learned concurrency-aware CPU model that with high accuracy can calculate the performance and cloud cost of any possible design on any workload and virtual machines. Cosine can then search through that space in a matter of seconds to find the best design and materializes the actual code of the resulting storage engine design using a templated Rust imple- mentation. We demonstrate that on average Cosine outperforms state-of-the-art storage engines such as write-optimized RocksDB, read-optimized WiredTiger, and very write-optimized FASTER by\ 53x,\ 25x, and\ 20x, respectively, for diverse workloads, data sizes, and cloud budgets across all YCSB core workloads and many variants. }, author = {Subarna Chatterjee and Meena Jagadeesan and Wilson Qin and Stratos Idreos} } @conference {690112, title = {Entropy-Learned Hashing Constant Time Hashing with Controllable Uniformity}, booktitle = {ACM SIGMOD International Conference on Management of Data}, year = {2022}, abstract = { Hashing is a widely used technique for creating uniformly random numbers from arbitrary input data. It is a core component in relational data systems, key-value stores, compilers, networks and many more areas used for a wide range of operations including indexing, partitioning, filters, and sketches. Due to both the computational and data heavy nature of hashing in such operations, numerous recent studies observe that hashing emerges as a core bottleneck in modern systems. For example, a typical complex database query (TPC-H) could spend 50\% of its total cost in hash tables, while Google spends at least 2\% of its total computational cost across all systems on C++ hash tables, resulting in a massive yearly footprint coming from just a single operation. In this paper we propose a new method, called Entropy-Learned Hashing, which reduces the computational cost of hashing by up to an order of magnitude. The key question we ask is {\textquotedblleft}how much randomness is needed?{\textquotedblright}: We look at hashing from a pseudorandom point of view, wherein hashing is viewed as extracting randomness from a data source to create random outputs and we show that state-of-the-art hash functions do too much work. Entropy-Learned Hashing 1) models and estimates the randomness (entropy) of the input data, and then 2) creates data-specific hash functions that use only the parts of the data that are needed to differentiate the outputs. Thus the resulting hash functions can minimize the amount of computation needed while we prove that they act similarly to traditional hash functions in terms of the uniformity of their outputs. We test Entropy-Learned Hashing across diverse and core hashing operations such as hash tables, Bloom filters, and partitioning and we observe an increase in throughput in the order of 3.7X, 4.0X, and 14X respectively compared to the best in-class hash functions and implementations used at scale by Google and Meta. }, author = {Brian Hentschel and Utku Sirin and Stratos Idreos} } @conference {672984, title = {More or Less: When and How to Build Convolutional Neural Network Ensembles}, booktitle = {International Conference on Learning Representations (ICLR)}, year = {2021}, abstract = {Convolutional neural networks are utilized to solve increasingly more complex problems and with more data. As a \ result, researchers and practitioners seek to scale the representational power of such models by adding more parameters.\ \%However, increasing parameters requires additional critical resources in terms of memory and compute, \ leading to increased training and inference cost. Thus a consistent challenge is to obtain as high as possible accuracy within a parameter budget.\ As neural network designers navigate this complex landscape, they are guided by conventional wisdom that is informed from past empirical studies.\ We identify a critical part of this design space that is not well-understood: How to decide between the alternatives of expanding a single convolutional network model or increasing the number of networks in the form of an ensemble.\ We study this question in detail across various network architectures and data sets. We build an extensive experimental framework that captures numerous angles of the possible design space in terms of how a new set of parameters can be used in a model. \ We consider a holistic set of metrics such as training time, inference time, and memory usage. The framework provides a robust assessment by making sure it controls for the number of parameters.\ Contrary to conventional wisdom, we show that when we perform a holistic and robust assessment, we uncover a wide design space, where ensembles provide better accuracy, train faster, and deploy at speed comparable to single convolutional networks with the same total number of parameters.\ }, author = {Wasay, Abdul and Stratos Idreos} } @article {668066, title = {Stacked Filters: Learning to Filter by Structure}, journal = {Proceedings of the VLDB Endowment}, volume = {14}, number = {4}, year = {2021}, pages = {600 - 612}, abstract = {We present Stacked Filters, a new probabilistic filter which is fast and robust similar to query-agnostic filters (such as Bloom and Cuckoo filters), and at the same time brings low false positive rates and sizes similar to classifier-based filters (such as Learned Filters). The core idea is that Stacked Filters incorporate workload knowledge about frequently queried non-existing values. Instead of learning, they structurally incorporate that knowledge using hashing and several sequenced filter layers, indexing both data and frequent negatives. Stacked Filters can also gather workload knowledge on-the-fly and adaptively build the filter. We show experimentally that for a given memory budget, Stacked Filters achieve end-to-end query throughput up to 130x better than the best alternative for a workload, either query-agnostic or classifier-based filters, and depending on where data is (SSD or HDD).}, author = {Kyle Deeds and Brian Hentschel and Stratos Idreos} } @conference {651957, title = {Rosetta: A Robust Space-Time Optimized Range Filter for Key-Value Stores}, booktitle = {In Proceedings of the ACM SIGMOD International Conference on Management of Data}, year = {2020}, abstract = {We introduce Rosetta, a probabilistic range filter designed specifically for LSM-tree based key-value stores. The core intuition is that we can sacrifice filter probe time because it is not visible in end-to-end key-value store performance, which in turn allows us to significantly reduce the filter false positive rate for every level of the tree.Rosetta indexes all binary prefixes of a key using a hierarchically arranged set of Bloom filters. It then converts each range query into multiple probes, one for each non- overlapping binary prefix. Rosetta has the ability to track workload patterns and adopt a beneficial tuning for each individual LSM-tree run by adjusting the number of Bloom filters it uses and how memory is spread among them to optimize the FPR/CPU cost balance.We show how to integrate Rosetta in a full system, RocksDB, and we demonstrate that it brings as much as a 40x improvement compared to default RocksDB and 2-5x improvement compared to state-of-the-art range filters in a variety of workloads and across different levels of the memory hierarchy (memory, SSD, hard disk). We also show that, unlike state-of-the-art filters, Rosetta brings a net benefit in RocksDB{\textquoteright}s overall performance, i.e., it improves range queries without losing any performance for point queries.}, author = {Siqiang Luo and Subarna Chatterjee and Rafael Ketsetsidis and Niv Dayan and Wilson Qin and Stratos Idreos} } @conference {651343, title = {Key-Value Storage Engines}, booktitle = {ACM SIGMOD International Conference on Management of Data}, year = {2020}, abstract = {Key-value stores are everywhere. They power a diverse set of data-driven applications across both industry and science. Key-value stores are used as stand-alone NoSQL systems but they are also used as a part of more complex pipelines and systems such as machine learning and relational systems. In this tutorial, we survey the state-of-the-art approaches on how the core storage engine of a key-value store system is designed. We focus on several critical components of the engine, starting with the core data structures to lay out data across the memory hierarchy. We also discuss design issues related to caching, timestamps, concurrency control, updates, shifting workloads, as well as mixed workloads with both analytical and transactional characteristics. We cover designs that are read-optimized, write-optimized as well as hybrids. We draw examples from several state-of-the-art systems but we also put everything together in a general framework which allows us to model storage engine designs under a single unified model and reason about the expected behavior of diverse designs. In addition, we show that given the vast number of possible storage engine designs and their complexity, there is a need to be able to describe and communicate design decisions at a high level descriptive language and we present a first version of such a language. We then use that framework to present several open challenges in the field especially in terms of supporting increasingly more diverse and dynamic applications in the era of data science and AI, including neural networks, graphs, and data versioning.}, author = {Stratos Idreos and Mark Callaghan} } @conference {649403, title = {MOTHERNETS: RAPID DEEP ENSEMBLE LEARNING}, booktitle = {Proceedings of the Conference on Machine Learning and Systems (MLSys)}, year = {2020}, abstract = {Ensembles of deep neural networks significantly improve generalization accuracy. However, training neural network ensembles requires a large amount of computational resources and time. State-of-the-art approaches either train all networks from scratch leading to prohibitive training cost that allows only very small ensemble sizes in practice, or generate ensembles by training a monolithic architecture, which results in lower model diversity and decreased prediction accuracy. We propose MotherNets to enable higher accuracy and practical training cost for large and diverse neural network ensembles: A MotherNet captures the structural similarity across some or all members of a deep neural network ensemble which allows us to share data movement and computation costs across these networks. We first train a single or a small set of MotherNets and, subsequently, we generate the target ensemble networks by transferring the function from the trained MotherNet(s). Then, we continue to train these ensemble networks, which now converge drastically faster compared to training from scratch. MotherNets handle ensembles with diverse architectures by clustering ensemble networks of similar architecture and training a separate MotherNet for every cluster. MotherNets also use clustering to control the accuracy vs. training cost tradeoff. We show that compared to state-of-the-art approaches such as Snapshot Ensembles, Knowledge Distillation, and TreeNets, MotherNets provide a new Pareto frontier for the accuracy-training cost tradeoff. Crucially, training cost and accuracy improvements continue to scale as we increase the ensemble size (2 to 3 percent reduced absolute test error rate and up to 35 percent faster training compared to Snapshot Ensembles). We verify these benefits over numerous neural network architectures and large data sets.}, author = {Wasay, Abdul and Brian Hentschel and Yuze Liao and Sanyuan Chen and Stratos Idreos} } @article {643025, title = {Optimal Column Layout for Hybrid Workloads}, journal = {Proceedings of the Very Large Databases Endowment}, volume = {12}, number = {13}, year = {2019}, abstract = {Data-intensive analytical applications need to support both efficient reads and writes. However, what is usually a good data layout for an update-heavy workload, is not well-suited for a read-mostly one and vice versa. Modern analytical data systems rely on columnar layouts and employ delta stores to inject new data and updates.We show that for hybrid workloads we can achieve close to one order of magnitude better performance by tailoring the column layout design to the data and query workload. Our approach navigates the possible design space of the physical layout: it organizes each column{\textquoteright}s data by determining the number of partitions, their corresponding sizes and ranges, and the amount of buffer space and how it is allocated. We frame these design decisions as an optimization problem that, given workload knowledge and performance requirements, provides an optimal physical layout for the workload at hand. To evaluate this work, we build an in-memory storage engine, Casper, and we show that it outperforms state-of-the-art data layouts of analytical systems for hybrid workloads. Casper deliv- ers up to 2.32{\texttimes}\ higher throughput for update-intensive workloads and up to 2.14{\texttimes}\ higher throughput for hybrid workloads. We further show how to make data layout decisions robust to workload variation by carefully selecting the input of the optimization.}, author = {Athanassoulis, Manos and Kenneth S. B{\o}gh and Stratos Idreos} } @article {637282, title = {Learning Data Structure Alchemy}, journal = {Bulletin of the IEEE Computer Society Technical Committee on Data Engineering}, volume = {42}, number = {2}, year = {2019}, pages = {46-57}, abstract = {We propose a solution based on first principles and AI to the decades-old problem of data structure design. Instead of working on individual designs that each can only be helpful in a small set of environments, we propose the construction of an engine, a Data Alchemist, which learns how to blend fine-grained data structure design principles to automatically synthesize brand new data structures.}, author = {Stratos Idreos and Kostas Zoumpatianos and Subarna Chatterjee and Wilson Qin and Wasay, Abdul and Brian Hentschel and Mike Kester and Niv Dayan and Demi Guo and Minseo Kang and Yiyou Sun} } @conference {635339, title = {The Log-Structured Merge-Bush \& the Wacky Continuum}, booktitle = {ACM SIGMOD International Conference on Management of Data}, year = {2019}, abstract = {Data-intensive key-value stores based on the Log-Structured Merge-Tree are used in numerous modern applications ranging from social media and data science to cloud infrastructure. We show that such designs exhibit an intrinsic contention be- tween the costs of point reads, writes and memory, and that this trade-off deteriorates as the data size grows. The root of the problem is that in all existing designs, the capacity ratio between any pair of levels is fixed. This causes write cost to increase with the data size while yielding exponentially diminishing returns for point reads and memory.We introduce the Log-Structured Merge-Bush (LSM-Bush), a new data structure that sets increasing capacity ratios between adjacent pairs of smaller levels. As a result, smaller levels get lazier by gathering more runs before merging them. By using a doubly-exponential ratio growth rate, LSM-bush brings write cost down from\ O(log\ N\ )\ to\ O(log log\ N\ ), and it can trade this gain to either improve point reads or memory. Thus, it enables more scalable trade-offs all around.We further introduce Wacky, a design continuum that includes LSM-Bush as well as all state-of-the-art merge policies, from laziest to greediest, and can assume any of them within a single implementation. Wacky encompasses a vast space of performance properties, including ones that favor range reads, and it can be searched analytically to find the design that performs best for a given workload in practice.}, author = {Niv Dayan and Stratos Idreos} } @conference {633702, title = {From Auto-tuning One Size Fits All to Self-designed and Learned Data-intensive Systems}, booktitle = {ACM SIGMOD International Conference on Management of Data}, year = {2019}, abstract = {We survey new opportunities to design data systems, data structures and algorithms that can adapt to both data and query workloads. Data keeps growing, hardware keeps changing and new applications appear ever more frequently. One size does not fit all, but data-intensive applications would like to balance and control memory requirements, read costs, write costs, as well as monetary costs on the cloud. This calls for tailored data systems, storage, and computation solutions that match the exact requirements of the scenario at hand. Such systems should be {\textquoteleft}{\textquoteleft}synthesized{\textquoteright}{\textquoteright} quickly and nearly automatically, removing the human system designers and administrators from the loop as much as possible to keep up with the quick evolution of applications and workloads. In addition, such systems should {\textquoteleft}{\textquoteleft}learn{\textquoteright}{\textquoteright} from both past and current system performance and workload patterns to keep adapting their design.\ We survey new trends in 1) self-designed, and 2) learned data systems and how these technologies can apply to relational, NoSQL, and big data systems as well as to broad data science applications. We focus on both recent research advances and practical applications of this technology, as well as numerous open research opportunities that come from their fusion. We specifically highlight recent work on data structures, algorithms, and query optimization, and how machine learning inspired designs as well as a detailed mapping of the possible design space of solutions can drive innovation to create tailored systems. We also position and connect with past seminal system designs and research in auto-tuning, modular/extensible, and adaptive data systems to highlight the new challenges as well as the opportunities to combine past and new technologies.\ }, author = {Stratos Idreos and Tim Kraska} } @conference {627879, title = {Design Continuums and the Path Toward Self-Designing Key-Value Stores that Know and Learn}, booktitle = {Biennial Conference on Innovative Data Systems Research (CIDR)}, year = {2019}, abstract = {We introduce the concept of design continuums for the data layout of key-value stores. A design continuum unifies major distinct data structure designs under the same model. The critical insight and potential long-term impact is that such unifying models 1)~render what we consider up to now as fundamentally different data structures to be seen as {\textquoteleft}{\textquoteleft}views{\textquoteright}{\textquoteright} of the very same overall design space, and 2)~allow {\textquoteleft}{\textquoteleft}seeing{\textquoteright}{\textquoteright} new data structure designs with performance properties that are not feasible by existing designs. The core intuition behind the construction of design continuums is that all data structures arise from the very same set of fundamental design principles, i.e., a small set of data layout design concepts out of which we can synthesize any design that exists in the literature as well as new ones. We show how to construct, evaluate, and expand, design continuums and we also present the first continuum that unifies major data structure designs, i.e., B+Tree, BeTree, LSM-tree, and LSH-Table.The practical benefit of a design continuum is that it creates a fast inference engine for the design of data structures. For example, we can near instantly predict how a specific design change in the underlying storage of a data system would affect performance, or reversely what would be the optimal data structure (from a given set of designs) given workload characteristics and a memory budget. In turn, these properties allow us to envision a new class of self-designing key-value stores with a substantially improved ability to adapt to workload and hardware changes by transitioning between drastically different data structure designs to assume a diverse set of performance properties at will.\ }, author = {Stratos Idreos and Niv Dayan and Wilson Qin and Mali Akmanalp and Sophie Hilgard and Andrew Ross and James Lennon and Varun Jain and Harshita Gupta and David Li and Zichen Zhu} } @article {622065, title = {Optimal Bloom Filters and Adaptive Merging for LSM-Trees}, journal = {ACM Transactions on Database Systems}, year = {2018}, abstract = {In this paper, we show that key-value stores backed by a log-structured merge-tree (LSM-tree) exhibit an intrinsic trade-off between lookup cost, update cost, and main memory footprint, yet all existing designs expose a suboptimal and difficult to tune trade-off among these metrics. We pinpoint the problem to the fact that modern key-value stores suboptimally co-tune the merge policy, the buffer size, and the Bloom filters{\textquoteright} false positive rates across the LSM-tree{\textquoteright}s different levels.We present Monkey, an LSM-tree based key-value store that strikes the optimal balance between the costs of updates and lookups with any given main memory budget. The core insight is that worst-case lookup cost is proportional to the sum of the false positive rates of the Bloom filters across all levels of the LSM-tree. Contrary to state-of-the-art key-value stores that assign a fixed number of bits-per-element to all Bloom filters, Monkey allocates memory to filters across different levels so as to minimize the sum of their false positive rates. We show analytically that Monkey reduces the asymptotic complexity of the worst-case lookup I/O cost, and we verify empirically using an implementation on top of RocksDB that Monkey reduces lookup latency by an increasing margin as the data volume grows (50\%\ -\ 80\%\ for the data sizes we experimented with). Furthermore, we map the design space onto a closed-form model that enables adapting the merging frequency and memory allocation to strike the best trade-off among lookup cost, update cost and main memory, depending on the workload (proportion of lookups and updates), the dataset (number and size of entries), and the underlying hardware (main memory available, disk vs. flash). We show how to use this model to answer what-if design questions about how changes in environmental parameters impact performance and how to adapt the design of the key-value store for optimal performance.}, author = {Niv Dayan and Athanassoulis, Manos and Stratos Idreos} } @article {621226, title = {The Periodic Table of Data Structures}, journal = {Bulletin of the IEEE Computer Society Technical Committee on Data Engineering}, volume = {41}, number = {3}, year = {2018}, pages = {64-75}, abstract = {We describe the vision of being able to reason about the design space of data structures.\ We break this down into two questions: 1) Can we know all data structures that is possible to design?\ \ 2) Can we compute the performance of arbitrary designs on a given hardware and workload\ without having to implement the design or even access the target hardware?If those challenges are possible, then an array of exciting opportunities would become feasible such as\ interactive what-if design to improve the productivity of data systems researchers and engineers, and\ informed decision making in industrial settings with regards to critical ardware/workload/data structure design issues.\ Then, even fully automated discovery of new data structure designs becomes possible.\ Furthermore, the structure of the design space itself provides numerous insights and opportunities such as the existence of design continuums\ that can lead to data systems with deep adaptivity, and a new understanding of the possible performance trade-offs.\ Given the universal presence of data structures at the very core of any data-driven field across all sciences and industries, reasoning about their design can have significant benefits, making it more feasible (easier, faster and cheaper) to adopt tailored state-of-the-art storage solutions. And this effect is going to become increasingly more critical as data keeps growing, hardware keeps changing and more applications/fields realize the transformative power and potential of data analytics. \ This paper presents this vision and surveys first steps that demonstrate its feasibility.\ }, author = {Stratos Idreos and Konstantinos Zoumpatianos and Athanassoulis, Manos and Niv Dayan and Brian Hentschel and Michael S. Kester and Demi Guo and Lukas Maas and Wilson Qin and Wasay, Abdul and Yiyou Sun} } @article {611070, title = {Smooth Scan: Robust Access Path Selection without Cardinality Estimation}, journal = {The International Journal on Very Large Databases (VLDBJ)}, year = {2018}, abstract = {Query optimizers depend heavily on statistics representing column distributions to create good query plans. In many cases, though, statistics are outdated or non-existent, and the process of refreshing statistics is very expensive, especially for ad-hoc work- loads on ever bigger data. This results in suboptimal plans that severely hurt performance. The core of the problem is the fixed decision on the type of physical operators that comprise a query plan.This paper makes a case for continuous adaptation and morphing of physical operators throughout their lifetime, by adjusting their behavior in accordance with\ the observed statistical properties of the data at run- time. We demonstrate the benefits of the new paradigm by designing and implementing an adaptive access path operator called Smooth Scan, which morphs continuously within the space of index access and full table scan. Smooth Scan behaves similarly to an index scan for low selectivity; if selectivity increases, however, Smooth Scan progressively morphs its behavior toward a sequential scan. As a result, a system with Smooth Scan requires no optimization decisions on the access paths up front. Additionally, by depending only on the result distribution and eschewing statistics and cardinality estimates altogether, Smooth Scan ensures repeatable execution across multiple query invocations. Smooth Scan implemented in PostgreSQL demonstrates robust, near-optimal performance on micro-benchmarks and real-life workloads, while being statistics-oblivious at the same time.}, author = {Renata Borovica and Stratos Idreos and Anastasia Ailamaki and Marcin Zukowski and Campbell Fraser} } @conference {608613, title = {Dostoevsky: Better Space-Time Trade-Offs for LSM-Tree Based Key-Value Stores via Adaptive Removal of Superfluous Merging}, booktitle = {ACM SIGMOD International Conference on Management of Data}, year = {2018}, abstract = {We show that all mainstream LSM-tree based key-value stores in the literature and in industry suboptimally trade between the I/O cost of updates on one hand and the I/O cost of lookups and storage space on the other. The reason is that they perform equally expensive merge operations across all levels of LSM-tree to bound the number of runs that a lookup has to probe and to remove obsolete entries to reclaim storage space. With state-of-the-art designs, however, merge operations from all levels of LSM-tree but the largest (i.e., most merge operations) reduce point lookup cost, long range lookup cost, and storage space by a negligible amount while significantly adding to the amortized cost of updates.To address this problem, we introduce Lazy Leveling, a new design that removes merge operations from all levels of LSM-tree but the largest. Lazy Leveling improves the worst-case complexity of update cost while maintaining the same bounds on point lookup cost, long range lookup cost, and storage space. We further introduce Fluid LSM-tree, a generalization of the entire LSM-tree design space that can be parameterized to assume any existing design. Relative to Lazy Leveling, Fluid LSM-tree can optimize more for updates by merging less at the largest level, or it can optimize more for short range lookups by merging more at all other levels.We put everything together to design Dostoevsky, a key-value store that adaptively removes superfluous merging by navigating the Fluid LSM-tree design space based on the application workload and hardware. We implemented Dostoevsky on top of RocksDB, and we show that it strictly dominates state-of-the-art designs in terms of performance and storage space.}, author = {Niv Dayan and Stratos Idreos} } @conference {608611, title = {Column Sketches: A Scan Accelerator for Rapid and Robust Predicate Evaluation}, booktitle = {ACM SIGMOD International Conference on Management of Data}, year = {2018}, abstract = {While numerous indexing and storage schemes have been developed to address the core functionality of predicate evaluation in data systems, they all require specific workload properties (query selectivity, data distribution, data clustering) to provide good performance and fail in other cases. We present a new class of indexing scheme, termed a\ Column Sketch, which improves the performance of predicate evaluation independently of workload properties. Column Sketches work primarily through the use of lossy compression schemes which are designed so that the index ingests data quickly, evaluates any query performantly, and has small memory footprint. A Column Sketch works by applying this lossy compression on a value-by-value basis, mapping base data to a representation of smaller fixed width codes. Queries are evaluated affirmatively or negatively for the vast majority of values using the compressed data, and only if needed check the base data for the remaining values. Column Sketches work over column, row, and hybrid storage layouts.We demonstrate that by using a Column Sketch, the select operator in modern analytic systems attains better CPU efficiency and less data movement than state-of-the-art storage and indexing schemes. Compared to standard scans, Column Sketches provide an improvement of\ 3{\texttimes}-6{\texttimes}\ for numerical attributes and 2.7{\texttimes}\ for categorical attributes. Compared to state-of-the-art scan accelera- tors such as Column Imprints and BitWeaving, Column Sketches perform 1.4 - 4.8{\texttimes}\ better.}, author = {Brian Hentschel and Michael S. Kester and Stratos Idreos} } @conference {608610, title = {The Data Calculator: Data Structure Design and Cost Synthesis From First Principles, and Learned Cost Models}, booktitle = {ACM SIGMOD International Conference on Management of Data }, year = {2018}, abstract = {Data structures are critical in any data-driven scenario, but they are notoriously hard to design due to a massive design space and the dependence of performance on workload and hardware which evolve continuously. We present a design engine, the Data Calculator, which enables interactive and semi-automated design of data structures. It brings two innovations. First, it offers a set of fine-grained design primitives that capture the first principles of data layout design: how data structure nodes lay data out, and how they are positioned relative to each other. This allows for a structured description of the universe of possible data structure designs that can be synthesized as combinations of those primitives. The second innovation is computation of performance using learned cost models. These models are trained on diverse hardware and data profiles and capture the cost properties of fundamental data access primitives (e.g., random access). With these models, we synthesize the performance cost of complex operations on arbitrary data structure designs without having to: 1) implement the data structure, 2) run the workload, or even 3) access the target hardware. We demonstrate that the Data Calculator can assist data structure designers and researchers by accurately answering rich what-if design questions on the order of a few seconds or minutes, i.e., computing how the performance (response time) of a given data structure design is impacted by variations in the: 1) design, 2) hardware, 3) data, and 4) query workloads. This makes it effortless to test numerous designs and ideas before embarking on lengthy implementation, deployment, and hardware acquisition steps. We also demonstrate that the Data Calculator can synthesize entirely new designs, auto-complete partial designs, and detect suboptimal design choices.}, author = {Stratos Idreos and Konstantinos Zoumpatianos and Brian Hentschel and Michael S. Kester and Demi Guo} } @conference {526101, title = {Data Canopy: Accelerating Exploratory Statistical Analysis}, booktitle = {ACM SIGMOD International Conference on Management of Data}, year = {2017}, abstract = { \  During exploratory statistical analysis, data scientists repeatedly compute statistics on data sets to infer knowledge. Moreover, statistics form the building blocks of core machine learning classification and filtering algorithms. Modern data systems, software libraries, and domain-specific tools provide support to compute statistics but lack a cohesive framework for storing, organizing, and reusing them. This creates a significant problem for exploratory statistical analysis as data grows: Despite existing overlap in exploratory workloads (which are repetitive in nature), statistics are always computed from scratch. This leads to repeated data movement and recomputation, hindering interactive data exploration. We address this challenge in Data Canopy, where descriptive and dependence statistics are synthesized from a library of basic aggregates. These basic aggregates are stored within an in-memory data structure, and are reused for overlapping data parts and for various statistical measures. What this means for exploratory statistical analysis is that repeated requests to compute different statistics do not trigger a full pass over the data. We discuss in detail the basic design elements in Data Canopy, which address multiple challenges: (1) How to decompose statistics into basic aggregates for maximal reuse? (2) How to represent, store, maintain, and access these basic aggregates? (3) Under different scenarios, which basic aggregates to maintain? (4) How to tune Data Canopy in a hardware conscious way for maximum performance and how to maintain good performance as data grows and memory pressure increases? We demonstrate experimentally that Data Canopy results in an average speed-up of at least 10{\texttimes} after just 100 exploratory queries when compared with state-of-the-art systems used for exploratory statistical analysis.\  \  }, author = {Wasay, Abdul and Xinding Wei and Niv Dayan and Stratos Idreos} } @conference {521291, title = {Monkey: Optimal Navigable Key-Value Store}, booktitle = {ACM SIGMOD International Conference on Management of Data}, year = {2017}, abstract = {\ In this paper, we show that key-value stores backed by an LSM-tree exhibit an intrinsic trade-off between lookup cost, update cost, and main memory footprint, yet all existing designs expose a suboptimal and difficult to tune trade-off among these metrics. We pinpoint the problem to the fact that all modern key-value stores suboptimally co-tune the merge policy, the buffer size, and the Bloom filters{\textquoteright} false positive rates in each level.We present Monkey, an LSM-based key-value store that strikes the optimal balance between the costs of updates and lookups with any given main memory budget. The insight is that worst-case lookup cost is proportional to the sum of the false positive rates of the Bloom filters across all levels of the LSM-tree. Contrary to state-of-the-art key-value stores that assign a fixed number of bits-per-element to all Bloom filters, Monkey allocates memory to filters across different levels so as to minimize this sum. We show analytically that Monkey reduces the asymptotic complexity of the worst-case lookup I/O cost, and we verify empirically using an implementation on top of LevelDB that Monkey reduces lookup latency by an increasing margin as the data volume grows (50\% - 80\% for the data sizes we experimented with). Furthermore, we map the LSM-tree design space onto a closed-form model that enables co-tuning the merge policy, the buffer size and the filters{\textquoteright} false positive rates to trade among lookup cost, update cost and/or main memory, depending on the workload (proportion of lookups and updates), the dataset (number and size of entries), and the underlying hardware (main memory available, disk vs. flash). We show how to use this model to answer what-if design questions about how changes in environmental parameters impact performance and how to adapt the various LSM-tree design elements accordingly.\ \ }, author = {Niv Dayan and Athanassoulis, Manos and Stratos Idreos} } @conference {521286, title = {Access Path Selection in Main-Memory Optimized Data Systems: Should I Scan or Should I Probe?}, booktitle = {ACM SIGMOD International Conference on Management of Data }, year = {2017}, abstract = {The advent of columnar data analytics engines fueled a series of optimizations on the scan operator. New designs include column-group storage, vectorized execution, shared scans, working directly over compressed data, and operating using SIMD and multi-core execution. Larger main memories and deeper cache hierarchies increase the efficiency of modern scans, prompting a revisit of the question of access path selection.In this paper, we compare modern sequential scans and secondary index scans. Through detailed analytical modeling and experimentation we show that while scans have become useful in more cases than before, both access paths are still useful, and so, access path selection (APS) is still required to achieve the best performance when considering variable workloads. We show how to perform access path selection. In particular, contrary to the way traditional systems choose between scans and secondary indexes, we find that in addition to the query selectivity, the underlying hardware, and the system design, modern optimizers also need to take into account query concurrency. We further discuss the implications of integrating access path selection in a modern analytical data system. We demonstrate, both theoretically and experimentally, that using the proposed model a system can quickly perform access path selection, outperforming solutions that rely on a single access path or traditional access path models. We outline a light-weight mechanism to integrate APS into main-memory analytical systems that does not interfere with low latency queries. We also use the APS model to explain how the division between sequential scan and secondary index scan has historically changed due to hardware and workload changes, which allows for future projections based on hardware advancements.}, author = {Michael S. Kester and Athanassoulis, Manos and Stratos Idreos} } @conference {503601, title = {The Automatic Scientist will be a Data System}, booktitle = {Biennial Conference on Innovative Data Systems Research (CIDR)}, year = {2017}, abstract = {For thousands of years science happens in a rather manual way. Mathematics, engineering and computer science provide the means to automate some of the laborious tasks that have to do with computation, data collection and management, and to some degree predictability. As scientific fields grow more mature, though, and scientists over-specialize a new problem appears that has to do with the core of the scientific process rather with the supporting steps.\ It becomes increasingly harder to be aware of all research concepts and techniques that may apply to a given problem.\ \ }, author = {Stratos Idreos} } @article {503596, title = {ADS: The Adaptive Data Series Index}, journal = {The Very Large Databases Journal (VLDBJ)}, volume = {25}, number = {6}, year = {2016}, pages = {843-866}, abstract = {Numerous applications continuously produce big amounts of data series, and in several time critical scenarios analysts need to be able to query these data as soon as they become available. This, however, is not currently possible with the state-of-the-art indexing methods and for very large data series collections. In this paper, we present the first adaptive indexing mechanism, specifically tailored to solve the problem of indexing and querying very large data series collections. We present a detailed design and evaluation of our method using approximate and exact query algorithms with both synthetic and real datasets. Adaptive indexing significantly outperforms previous solutions, gracefully handling large data series collections, reducing the data to query delay: by the time state-of-the-art indexing techniques finish indexing 1 billion data series (and before answering even a single query), our method has already answered 3 * 105 queries.}, author = {Kostas Zoumpatianos and Stratos Idreos and Themis Palpanas} } @conference {503591, title = {Past and Future Steps for Adaptive Storage Data Systems: From Shallow to Deep Adaptivity}, booktitle = {International Workshop on Enabling Real-Time Business Intelligence (BIRTE 2016)}, year = {2016}, abstract = {\ Datasystems with adaptive storage can autonomously change their behavior by altering how data is stored and accessed. Such systems have been studied primarily for the case of adaptive indexing to auto- matically create the right indexes at the right granularity. More recently work on adaptive loading and adaptive data layouts brought even more flexibility. We survey this work and describe the need for even deeper adaptivity that goes beyond adjusting knobs in a single architecture; instead it can adapt the fundamental architecture of a data system to drastically alter its behavior.\ \ }, author = {Stratos Idreos and Athanassoulis, Manos and Niv Dayan and Demi Guo and Michael S. Kester and Lukas M. Maas and Kostas Zoumpatianos} } @conference {395126, title = {GeckoFTL: Scalable Flash Translation Techniques For Very Large Flash Devices}, booktitle = {ACM SIGMOD International Conference on Management of Data}, year = {2016}, abstract = { The volume of metadata needed by a flash translation layer (FTL) is proportional to the storage capacity of a flash device. Ideally, this metadata should reside in the device{\textquoteright}s integrated RAM to enable fast access. However, as flash devices scale to terabytes, the necessary volume of metadata is exceeding the available integrated RAM. Moreover, recovery time after power failure, which is proportional to the size of the metadata, is becoming impractical. The simplest solution is to persist more metadata in flash. The problem is that updating metadata in flash increases the amount of internal IOs thereby harming performance and device lifetime. In this paper, we identify a key component of the metadata called the Page Validity Bitmap (PVB) as the bottleneck. PVB is used by the garbage-collectors of state-of-the-art FTLs to keep track of which physical pages in the device are invalid. PVB constitutes 95\% of the FTL{\textquoteright}s RAM-resident metadata, and recovering PVB after power fails takes a significant proportion of the overall recovery time. To solve this problem, we propose a page-associative FTL called GeckoFTL, whose central innovation is replacing PVB with a new data structure called Logarithmic Gecko. Logarithmic Gecko is similar to an LSM-tree in that it first logs updates and later reorganizes them to ensure fast and scalable access time. Relative to the baseline of storing PVB in flash, Logarithmic Gecko enables cheaper updates at the cost of slightly more expensive garbage-collection queries. We show that this is a good trade-off because (1) updates are intrinsically more frequent than garbage-collection queries to page validity metadata, and (2) flash writes are more expensive than flash reads. We demonstrate analytically and empirically through simulation that GeckoFTL achieves a 95\% reduction in space requirements and at least a 51\% reduction in recovery time by storing page validity metadata in flash while keeping the contribution to internal IO overheads 98\% lower than the baseline.\  }, author = {Niv Dayan and Philippe Bonnet and Stratos Idreos} } @conference {388556, title = {Adaptive Data Skipping in Main-Memory Systems}, booktitle = {ACM SIGMOD International Conference on Management of Data}, year = {2016}, abstract = {As modern main-memory optimized data systems increasingly rely on fast scans, lightweight indexes that allow for data skipping play a crucial role in data filtering to reduce system I/O. Scans benefit from data skipping when the data order is sorted, semi-sorted, or comprised of clustered values. However data skipping loses effectiveness over arbitrary data distributions. Applying data skipping techniques over non-sorted data can significantly decrease query performance since the extra cost of metadata reads result in no corresponding scan performance gains. We introduce adaptive data skipping as a framework for structures and techniques that respond to a vast array of data distributions and query workloads. We reveal an adaptive zonemaps design and implementation on a main-memory column store prototype to demonstrate that adaptive data skipping has potential for 1.4X speedup.}, author = {Wilson Qin and Stratos Idreos} } @conference {388586, title = {Design Tradeoffs of Data Access Methods}, booktitle = {ACM SIGMOD International Conference on Management of Data}, year = {2016}, abstract = {Database researchers and practitioners have been building methods to store, access, and update data for more than five decades. Designing access methods has been a constant effort to adapt to the ever changing underlying hardware and workload requirements. The recent explosion in data system designs {\textendash} including, in addition to traditional SQL systems, NoSQL, NewSQL, and other relational and non-relational systems {\textendash} makes understanding the tradeoffs of designing access methods more important than ever. Access methods are at the core of any new data system. In this tutorial we survey recent developments in access method design and we place them in the design space where each approach focuses primarily on one or a subset of read performance, update performance, and memory utilization. We discuss how to utilize designs and lessons-learned from past research. In addition, we discuss new ideas on how to build access methods that have tunable behavior, as well as, what is the scenery of open research problems.}, author = {Athanassoulis, Manos and Stratos Idreos} } @conference {388571, title = {Main Memory Adaptive Denormalization}, booktitle = {ACM SIGMOD International Conference on Management of Data}, year = {2016}, abstract = {Joins have traditionally been the most expensive database operator, but they are required to query normalized schemas. In turn, normalized schemas are necessary to minimize update costs and space usage. Joins can be avoided altogether by using a denormalized schema instead of a normalized schema; this improves analytical query processing times at the tradeoff of increased update overhead, loading cost, and storage requirements. In our work, we show that we can achieve the best of both worlds by leveraging partial, incremental, and dynamic denormalized tables to avoid join operators, resulting in fast query performance while retaining the minimized loading, update, and storage costs of a normalized schema. We introduce adaptive denormalization for modern main memory systems. We replace the traditional join operations with efficient scans over the relevant partial universal tables without incur- ring the prohibitive costs of full denormalization.}, author = {Zezhou Liu and Stratos Idreos} } @conference {388581, title = {UpBit: Scalable In-Memory Updatable Bitmap Indexing}, booktitle = {ACM SIGMOD International Conference on Management of Data}, year = {2016}, abstract = {Bitmap indexes are widely used in both scientific and commercial databases. They bring fast read performance for specific types of queries, such as equality and selective range queries. A major drawback of bitmap indexes, however, is that supporting updates is particularly costly. Bitmap indexes are kept compressed to minimize storage footprint; as a result, updating a bitmap index requires the expensive step of decoding and then encoding a bitvector. Today, more and more applications need support for both reads and writes, blurring the boundaries between analytical processing and transaction processing. This requires new system designs and access methods that support general updates and, at the same time, offer competitive read performance. In this paper, we propose scalable in-memory Updatable Bitmap indexing (UpBit), which offers efficient updates, without hurting read performance. UpBit relies on two design points. First, in addition to the main bitvector for each domain value, UpBit maintains an update bitvector, to keep track of updated values. Effectively, every update can now be directed to a highly-compressible, easy-to-update bitvector. While update bitvectors double the amount of uncompressed data, they are sparse, and as a result their compressed size is small. Second, we introduce fence pointers in all update bitvectors which allow for efficient retrieval of a value at an arbitrary position. Using both synthetic and real-life data, we demonstrate that UpBit significantly outperforms state-of-the-art bitmap indexes for workloads that contain both reads and writes. In particular, compared to update-optimized bitmap index designs UpBit is 15 - 29{\texttimes} faster in terms of update time and 2.7{\texttimes} faster in terms of read performance. In addition, compared to read-optimized bitmap index designs UpBit achieves efficient and scalable updates (51 - 115{\texttimes} lower update latency), while allowing for comparable read performance, having up to 8\% overhead.}, author = {Athanassoulis, Manos and Zheng Yan and Stratos Idreos} } @conference {372506, title = {Adaptive Indexing over Encrypted Numeric Data}, booktitle = {ACM SIGMOD International Conference on Management of Data}, year = {2016}, address = {San Francisco}, abstract = { Today, outsourcing query processing tasks to remote cloud servers becomes a viable option; such outsourcing calls for encrypting data stored at the server so as to render it secure against eavesdropping adversaries and/or an honest-but-curious server itself. At the same time, to be efficiently managed, outsourced data should be indexed, and even adaptively so, as a side-effect of query pro- cessing. Computationally heavy encryption schemes render such outsourcing unattractive; an alternative, Order-Preserving Encryption Scheme (OPES), intentionally preserves and reveals the order in the data, hence is unattractive from the security viewpoint. In this paper, we propose and analyze a scheme for lightweight and indexable encryption, based on linear-algebra operations. Our scheme provides higher security than OPES and allows for range and point queries to be efficiently evaluated over encrypted numeric data, with decryption performed at the client side. We implement a prototype that performs incremental, query-triggered adaptive indexing over encrypted numeric data based on this scheme, without leaking order information in advance, and without prohibitive overhead, as our extensive experimental study demonstrates.\  }, author = {Panagiotis Karras and Artyom Nikitin and Muhammad Saad and Rudrika Bhatt and Denis Antyukhov and Stratos Idreos} } @conference {372516, title = {Designing Access Methods: The RUM Conjecture}, booktitle = {International Conference on Extending Database Technology (EDBT)}, year = {2016}, address = {Bordeaux, France}, abstract = { The database research community has been building methods to store, access, and update data for more than four decades. Throughout the evolution of the structures and techniques used to access data, access methods adapt to the ever changing hardware and workload requirements. Today, even small changes in the workload or the hardware lead to a redesign of access methods. The need for new designs has been increasing as data generation and workload diversification grow exponentially, and hardware advances introduce increased complexity. New workload requirements are introduced by the emergence of new applications, and data is managed by large systems composed of more and more complex and heterogeneous hardware. As a result, it is increasingly important to develop application-aware and hardware-aware access methods. The fundamental challenges that every researcher, systems architect, or designer faces when designing a new access method are how to minimize, i) read times (R), ii) update cost (U), and iii) memory (or storage) overhead (M). In this paper, we conjecture that when optimizing the read-update-memory overheads, optimizing in any two areas negatively impacts the third. We present a simple model of the RUM overheads, and we articulate the RUM Conjecture. We show how the RUM Conjecture manifests in state-of-the-art access methods, and we envision a trend toward RUM-aware access methods for future data systems.\  }, author = {Athanassoulis, Manos and Michael Kester and Lukas Maas and Stoica, Radu and Stratos Idreos and Anastassia Ailamaki and Mark Callaghan} } @article {299631, title = {RINSE: Interactive Data Series Exploration with ADS+}, journal = {Proceedings of the Very Large Databases Endowment (PVLDB)}, volume = {8}, number = {12}, year = {2015}, abstract = {Numerous applications continuously produce big amounts of data series, and in several time critical scenarios analysts need to be able to query these data as soon as they become available. An adaptive index data structure, ADS+, which is specifically tailored to solve the problem of indexing and querying very large data series collections has been recently proposed as a solution to this problem. The main idea is that instead of building the complete index over the complete data set up-front and querying only later, we interactively and adaptively build parts of the index, only for the parts of the data on which the users pose queries. The net effect is that instead of waiting for extended periods of time for the index creation, users can immediately start exploring the data series. In this work, we present a demonstration of ADS+; we introduce RINSE, a system that allows users to experience the benefits of the ADS+ adaptive index through an intuitive web interface. Users can explore large datasets and find patterns of interest, using nearest neighbor search. They can draw queries (data series) using a mouse, or touch screen, or they can select from a predefined list of data series. RINSE can scale to large data sizes, while drastically reducing the data to query delay: by the time state-of-the-art indexing techniques finish indexing 1 billion data series (and before answering even a single query), adaptive data series indexing can already answer 300K queries.}, author = {Konstantinos Zoumpatianos and Stratos Idreos and Themis Palpanas} } @conference {254956, title = {Beyond the Wall: Near-Data Processing for Databases}, booktitle = {Proceedings of the 11th International Workshop on Data Management on New Hardware (DaMoN)}, year = {2015}, address = {Melbourne, Australia}, abstract = { The continuous growth of main memory size allows modern data systems to process entire large scale datasets in memory. The increase in memory capacity, however, is not matched by proportional decrease in memory latency, causing a mismatch for in-memory processing. As a result, data movement through the memory hierarchy is now one of the main performance bottlenecks for main memory data systems. Database systems researchers have proposed several innovative solutions to minimize data movement and to make data access patterns hardware-aware. Nevertheless, all relevant rows and columns for a given query have to be moved through the memory hierarchy; hence, movement of large data sets is on the critical path. In this paper, we present JAFAR, a Near-Data Processing (NDP) accelerator for pushing selects down to memory in modern column-stores. JAFAR implements the select operator and allows only qualifying data to travel up the memory hierarchy. Through a detailed simulation of JAFAR hardware we show that it has the potential to provide 9x\ improvement for selects in column-stores. In addition, we discuss both hardware and software challenges for using NDP in database systems as well as opportunities for further NDP accelerators to boost additional relational operators.\  }, author = {Sam (Likun) Xi and Oreoluwa Babarinsa and Athanassoulis, Manos and Stratos Idreos} } @conference {254186, title = {JAFAR: Near-Data Processing for Databases}, booktitle = {Proceedings of the ACM SIGMOD International Conference on Management of Data}, year = {2015}, address = {Melbourne, Australia}, abstract = {As main-memory sizes have grown, data systems have been able to process entire large-scale data-sets in memory. However, because memory speeds have been not been keeping pace with CPU speeds, the cost of moving data into CPU caches has begun to dominate certain operations within in-memory data systems. Recent advances in hardware architectures point to near memory computation capabilities becoming possible soon. This allows us to rethink how database systems process queries and how they split computation across the various computational units. In this paper, we present JAFAR, a near data processing accelerator for pushing selects down to memory. Through a detailed simulation of JAFAR hardware we show it has the potential to provide up to 900\% improvement for select operations in modern column-stores.}, author = {Oreoluwa Babarinsa and Stratos Idreos} } @conference {254181, title = {One Loop Does Not Fit All}, booktitle = {Proceedings of the ACM SIGMOD International Conference on Management of Data}, year = {2015}, address = {Melbourne, Australia}, abstract = { Just-In-Time (JIT) compilation increasingly becomes a key technology for modern database systems. It allows the creation of code on-the-fly to perfectly match an active query. In the past, it has been argued that a query should be compiled to a single loop that performs all query actions, for example, all selects over all relevant columns. On the other hand, vectorization {\textendash} a common feature in modern data systems {\textendash} allows for better results by evaluating the query predicates sequentially in different tight for-loops. In this paper, we study JIT compilation for modern in- memory column-stores in detail and we show that, contrary to the common belief that vectorization outweighs the benefits of having one loop, there are cases in which creating a single loop is actually the optimal solution. In fact, deciding between multiple or a single loop is not a static decision; instead, it depends on (per column) query selectivity. We perform our experiments on a modern column-store prototype that supports vectorization and we show that, depending on selectivity, a different code layout is optimal. When a select operator is implemented with a no-branch design, for low selectivity creating multiple loops performs better than a single loop. A single tight loop performs better otherwise.\  }, author = {Styliani Pantella and Stratos Idreos} } @conference {254236, title = {Queriosity: Automated Data Exploration}, booktitle = {Proceedings of the IEEE International Congress on Big Data}, year = {2015}, month = {2015}, address = {New York, USA}, abstract = {Curiosity, a fundamental drive amongst higher living organisms, is what enables exploration, learning and creativity. In our increasingly data-driven world, data exploration, i.e., making sense of mounting haystacks of data, is akin to intelligence for science, business and individuals. However, modern data systems {\textendash} designed for data retrieval rather than exploration {\textendash} only let us retrieve data and ask if it is interesting. This makes knowledge discovery a game of hit-and-trial which can only be orchestrated by expert data scientists. We present the vision toward Queriosity, an automated and personalized data exploration system. Designed on the principles of autonomy, learning and usability, Queriosity envisions a paradigm shift in data exploration and aims to become a a personalized {\textquotedblleft}data robot{\textquotedblright} that provides a direct answer to what is interesting in a user{\textquoteright}s data set, instead of just retrieving data. Queriosity autonomously and continuously navigates toward interesting findings based on trends, statistical properties and interactive user feedback.}, author = {Wasay, Abdul and Athanassoulis, Manos and Stratos Idreos} } @article {242946, title = {DASlab: The Data Systems Laboratory at Harvard SEAS}, journal = {ACM SIGMOD Record}, year = {2015}, abstract = {DASlab is a new laboratory at the Harvard School of Engineering and Applied Sciences (SEAS).\ The lab was formed in January 2014 when Stratos Idreos joined Harvard SEAS.\ DASlab currently consists of 3 PhD students, 1 postdoctoral researcher and 9 undergraduate researchers\ while it is set to double its graduate student population in the next one to two years.\ The lab is part of a growing community of systems and computer science researchers at Harvard;\ computer science faculty is scheduled to grow by 50\% in the next few years. The main focus of DASlab is on designing data systems that\ (a) make it easy to extract knowledge out of increasingly diverse and growing data sets\ and (b) can stand the test of time.}, author = {Stratos Idreos} } @article {230751, title = {NoDB: Efficient Query Execution on Raw Data Files}, journal = {Communications of the ACM, Research Highlights}, year = {2015}, abstract = {As data collections become larger and larger, users are faced with increasing bottlenecks in their data analysis. More data means more time to prepare and to load the data into the database before executing the desired queries. Many applications already avoid using database systems, e.g., scientific data analysis and social networks, due to the complexity and the increased data-to-query time, i.e., the time between getting the data and retrieving its first useful results. For many applications data collections keep growing fast, even on a daily basis, and this data deluge will only increase in the future, where it is expected to have much more data than what we can move or store, let alone analyze. We here present the design and roadmap of a new paradigm in database systems, called NoDB, which do not require data loading while still maintaining the whole feature set of a modern database system. In particular, we show how to make raw data files a first-class citizen, fully integrated with the query engine. Through our design and lessons learned by implementing the NoDB philosophy over a modern DBMS, we discuss the fundamental limitations as well as the strong opportunities that such a research path brings. We identify performance bottlenecks specific for in situ processing, namely the repeated parsing and tokenizing overhead and the expensive data type conversion. To address these problems, we introduce an adaptive indexing mechanism that maintains positional information to provide efficient access to raw data files, together with a flexible caching structure. We conclude that NoDB systems are feasible to design and implement over modern DBMS, bringing an unprecedented positive effect in usability and performance.}, author = {Ioannis Alagiannis and Renata Borovica and Miguel Branco and Stratos Idreos and Anastasia Ailamaki} } @conference {230746, title = {Overview of Data Exploration Techniques}, booktitle = {Proceedings of the ACM SIGMOD International Conference on Management of Data, Tutorial}, year = {2015}, address = {Melbourne, Australia}, abstract = {Data exploration is about efficiently extracting knowledge from data even if we do not know exactly what we are looking for. This has numerous side-effects on (a) how we design database systems at their core, i.e., at the storage and query processing layers and (b) how users or applications interact with systems. In this tutorial, we survey recent developments in the emerging area of database systems tailored for data exploration. We discuss new ideas on how to store and access data as well as new ideas on how to interact with a data system to enable users and applications to quickly figure out which data parts are of interest. In addition, we discuss how to exploit lessons-learned from past research, the new challenges data exploration crafts, emerging applications and future directions.}, author = {Stratos Idreos and Olga Papaemmanouil and Surajit Chaudhuri} } @conference {223356, title = {Holistic Indexing in Main-memory Column-stores}, booktitle = {Proceedings of the ACM SIGMOD International Conference on Management of Data}, year = {2015}, address = {Melbourne, Australia}, abstract = {Great database systems performance relies heavily on index tuning, i.e., creating and utilizing the best indices depending on the workload. However, the complexity of the index tuning process has dramatically increased in recent years due to ad-hoc workloads and shortage of time and system resources to invest in tuning. This paper introduces holistic indexing, a new approach to automated index tuning in dynamic environments. Holistic indexing requires zero set-up and tuning effort, relying on adaptive index creation as a side-effect of query processing. Indices are created incrementally and partially; they are continuously refined as we process more and more queries. Holistic indexing takes the state-of-the-art adaptive indexing ideas a big step further by introducing the notion of a system which never stops refining the index space, taking educated decisions about which index we should incrementally refine next based on continuous knowledge acquisition about the running workload and resource utilization. When the system detects idle CPU cycles, it utilizes those extra cycles by refining the adaptive indices which are most likely to bring a benefit for future queries. Such idle CPU cycles occur when the system cannot exploit all available cores up to 100\%, i.e., either because the workload is not enough to saturate the CPUs or because the current tasks performed for query processing are not easy to parallelize to the point where all available CPU power is exploited. In this paper, we present the design of holistic indexing for column-oriented database architectures and we discuss a detailed analysis against parallel versions of state-of-the-art indexing and adaptive indexing approaches. Holistic indexing is implemented in an open-source column-store DBMS. Our detailed experiments on both synthetic and standard benchmarks (TPC-H) and workloads (SkyServer) demonstrate that holistic indexing brings significant performance gains by being able to continuously refine the physical design in parallel to query processing, exploiting any idle CPU resources.}, author = {Eleni Petraki and Stratos Idreos and Stefan Manegold} } @conference {218651, title = {Smooth Scan: Statistics-Oblivious Access Paths}, booktitle = {IEEE International Conference on Data Engineering (ICDE)}, year = {2015}, address = {Seoul, Korea}, abstract = { Query optimizers depend heavily on statistics representing column distributions to create efficient query plans. In many cases, though, statistics are outdated or non-existent, and the process of refreshing statistics is very expensive, especially for ad-hoc workloads on ever bigger data. This results in suboptimal plans that severely hurt performance. The main problem is that any decision, once made by the optimizer, is fixed throughout the execution of a query. In particular, each logical operator translates into a fixed choice of a physical operator at run-time. In this paper we advocate for continuous adaptation and morphing of physical operators throughout their lifetime, by adjusting their behavior in accordance with the statistical properties of the data. We demonstrate the benefits of the new paradigm by designing and implementing an adaptive access path operator called Smooth Scan, which morphs continuously within the space of traditional index access and full table scan. Smooth Scan behaves similarly to an index scan for low selectivity; if selectivity increases, however, Smooth Scan progressively morphs its behavior toward a sequential scan. As a result, a system with Smooth Scan requires no access path decisions up front nor does it need accurate statistics to provide good performance. We implement Smooth Scan in PostgreSQL and, using both synthetic benchmarks as well as TPC-H, we show that it achieves robust performance while at the same time being statistics-oblivious.\  }, author = {Renata Borovica and Stratos Idreos and Anastasia Ailamaki and Marcin Zukowski and Campbell Fraser} } @conference {170046, title = {Database Cracking: Fancy Scan, not Poor Man{\textquoteright}s Sort!}, booktitle = {Proceedings of the 10th International Workshop on Data Management on New Hardware (DaMoN)}, year = {2014}, address = {Snowbird, Utah}, abstract = {Database Cracking is an appealingly simple approach to adaptive indexing: on every range-selection query, the data is partitioned using the supplied predicates as pivots. The core of database cracking is, thus, pivoted partitioning. While pivoted partitioning, like scanning, requires a single pass through the data it tends to have much higher costs due to lower CPU efficiency. In this paper, we conduct an in-depth study of the reasons for the low CPU efficiency of pivoted partitioning. Based on the findings, we develop an optimized version with significantly higher (single-threaded) CPU efficiency. We also develop a number of multi-threaded implementations that are effectively bound by memory bandwidth. Combining all of these optimizations we achieve an implementation that has costs close to or better than an ordinary scan on a variety of systems ranging from low-end (cheaper than $300) desktop machines to high-end (above $10,000) servers.}, author = {Holger Pirk and Eleni Petraki and Stratos Idreos and Stefan Manegold and Martin L. Kersten} } @article {170041, title = {Joins on Encoded and Partitioned Data}, journal = {Proceedings of the Very Large Databases Endowment (PVLDB)}, year = {2014}, abstract = { Compression has historically been used to reduce the cost of storage, I/Os from that storage, and buffer pool utilization, at the expense of the CPU required to decompress data every time it is queried. However, significant additional CPU efficiencies can be achieved by deferring decompression as late in query processing as possible and performing query processing operations directly on the still-compressed data. In this paper, we investigate the benefits and challenges of performing joins on compressed (or encoded) data. We demonstrate the benefit of independently optimizing the compression scheme of each join column, even though join predicates relating values from multiple columns may require translation of the encoding of one join column into the encoding of the other. We also show the benefit of compressing {\textquotedblleft}payload{\textquotedblright} data other than the join columns {\textquotedblleft}on the fly,{\textquotedblright} to minimize the size of hash tables used in the join. By partitioning the domain of each column and defining separate dictionaries for each partition, we can achieve even better overall compression as well as increased flexibility in dealing with new values introduced by updates. Instead of decom- pressing both join columns participating in a join to resolve their different compression schemes, our system performs a light-weight mapping of only qualifying rows from one of the join columns to the encoding space of the other at run time. Consequently, join predicates can be applied directly on the compressed data. We call this procedure encoding translation. Two alternatives of encoding translation are developed and compared in the paper. We provide a comprehensive evaluation of these alternatives using product implementations of each on the TPC-H data set, and demonstrate that performing joins on encoded and partitioned data achieves both su- perior performance and excellent compression.\  }, author = {Jae-Gil Lee and Gopi Attaluri and Ronald Barber and Naresh Chainani and Oliver Draese and Frederick Ho and Stratos Idreos and Min-Soo Kim and Sam Lightstone and Guy Lohman and Konstantinos Morfonios and Keshava Murthy and Ippokratis Pandis and Lin Qiao and Vijayshankar Raman and Vincent Kulandai Samy and Richard Sidle and Knut Stolze and Liping Zhang} } @article {163416, title = {Transactional support for adaptive indexing}, journal = {The International Journal on Very Large Databases (VLDBJ)}, volume = {23}, number = {2}, year = {2014}, pages = {303-328}, abstract = {Adaptive indexing initializes and optimizes indexes incrementally, as a side effect of query processing. The goal is to achieve the benefits of indexes while hiding or minimizing the costs of index creation. However, index-optimizing side effects seem to turn read-only queries into update transactions that might, for example, create lock contention. This paper studies concurrency control and recovery in the context of adaptive indexing. We show that the design and implementation of adaptive indexing rigorously separates index structures from index contents; this relaxes constraints and requirements during adaptive indexing compared to those of traditional index updates. Our design adapts to the fact that an adaptive index is refined continuously and exploits any concurrency opportunities in a dynamic way. A detailed experimental analysis demonstrates that (a) adaptive indexing maintains its adaptive properties even when running concurrent queries, (b) adaptive indexing can exploit the opportunity for parallelism due to concurrent queries, (c) the number of concurrency conflicts and any concurrency administration overheads follow an adaptive behavior, decreasing as the workload evolves and adapting to the workload needs.}, author = {Goetz Graefe and Felix Halim and Stratos Idreos and Harumi A. Kuno and Stefan Manegold and Bernhard Seeger} } @article {163411, title = {Distributed Large-Scale Information Filtering}, journal = {Transactions on Large-Scale Data- and Knowledge-Centered Systems XIII Lecture Notes in Computer Science}, volume = {13}, year = {2014}, pages = {91-122}, abstract = {We study the problem of distributed resource sharing in peer-to-peer networks and focus on the problem of information filter- ing. In our setting, subscriptions and publications are specified using an expressive attribute-value representation that supports both the Boolean and Vector Space models. We use an extension of the distributed hash table Chord to organise the nodes and store user subscriptions, and utilise efficient publication protocols that keep the network traffic and latency low at filtering time. To verify our approach, we evaluate the proposed protocols experimentally using thousands of nodes, millions of user sub- scriptions, and two different real-life corpora. We also study three impor- tant facets of the load-balancing problem in such a scenario and present a novel algorithm that manages to distribute the load evenly among the nodes. Our results show that the designed protocols are scalable and efficient: they achieve expressive information filtering functionality with low message traffic and latency.}, author = {Christos Tryfonopoulos and Stratos Idreos and Manolis Koubarakis and Paraskevi Raftopoulou} } @conference {163421, title = {H2O: A Hands-free Adaptive Store}, booktitle = {Proceedings of the ACM SIGMOD International Conference on Management of Data}, year = {2014}, address = {Snowbird, Utah}, abstract = {Modern state-of-the-art database systems are designed around a single data storage layout. This is a fixed decision that drives the whole design of the architecture of a database system, i.e., row-stores, column-stores, etc. However, none of those choices is a universally good solution; different workloads require different storage layouts and data access methods in order to achieve good performance. In this paper, we present the H2O system which introduces two novel concepts. First, it is flexible to support multiple storage layouts and data access patterns in a single engine. Second, and most importantly, it decides on-the-fly, i.e., during query processing, which design is best for classes of queries and the respective data parts. At any given point in time, parts of the data might be materialized in various patterns purely depending on the query workload; as the workload changes and with every single query, the storage and access patterns continuously adapt. In this way, H2O makes no a priori and fixed decisions on how data should be stored, allowing each single query to enjoy a storage and access pattern which is tailored to its specific properties. We present a detailed analysis of H2O using both synthetic benchmarks and realistic scientific workloads such as the SDSS (SkyServer). We demonstrate that while existing systems cannot achieve maximum performance across all workloads, H2O can always match the best case performance without requiring any tuning or workload knowledge.}, author = {Ioannis Alagiannis and Stratos Idreos and Anastasia Ailamaki} } @conference {163426, title = {Indexing for Interactive Exploration of Big Data Series}, booktitle = {Proceedings of the ACM SIGMOD International Conference on Management of Data}, year = {2014}, address = {Snowbird, Utah}, abstract = {Numerous applications continuously produce big amounts of data series, and in several time critical scenarios analysts need to be able to query these data as soon as they become available, which is not currently possible with the state-of-the-art indexing methods and for very large data series collections. In this paper, we present the first adaptive indexing mechanism, specifically tailored to solve the problem of indexing and querying very large data series collections. The main idea is that instead of building the complete index over the complete data set up-front and querying only later, we interactively and adaptively build parts of the index, only for the parts of the data on which the users pose queries. The net effect is that instead of waiting for extended periods of time for the index creation, users can immediately start exploring the data series. We present a detailed design and evaluation of adaptive data series indexing over both synthetic data and real-world workloads. The results show that our approach can gracefully handle large data series collections, while drastically reducing the data to query delay: by the time state-of-the-art indexing techniques finish indexing 1 billion data series (and before answering even a single query), adaptive data series indexing has already answered 3*105 queries.}, author = {Konstantinos Zoumpatianos and Stratos Idreos and Themis Palpanas} } @conference {136991, title = {dbTouch in Action: Database Kernels for Touch-based Data Exploration}, booktitle = {Proceedings of the 30th IEEE International Conference on Data Engineering (ICDE)}, year = {2014}, address = {Chicago}, abstract = {A fundamental need in the era of data deluge is data exploration through interactive tools, i.e., being able to quickly determine data and patterns of interest. dbTouch is a new research direction towards a next generation of data management systems that inherently support data exploration by allowing touch-based interaction. Data is represented in a visual format, while users can touch those shapes and interact/query with gestures. In a dbTouch system, the whole database kernel is geared towards quick responses in touch input; the user drives query processing (not just query construction) via touch gestures, dictating how fast or slow data flows through query plans and which data parts are processed at any time. dbTouch translates the gestures into interactive database operators, reacting continuously to the touch input and analytics tasks given by the user in real-time such as sliding a finger over a column to scan it progressively; zoom in with two fingers over a column to progressively get sample data; rotate a table to change the physical design from row-store to column-store, etc. This demo presents the first dbTouch prototype over iOS for iPad.}, author = {Erietta Liarou and Stratos Idreos} } @inbook {136961, title = {Big Data Exploration}, booktitle = {Big Data Computing}, year = {2013}, publisher = {Taylor and Francis}, organization = {Taylor and Francis}, abstract = {We are now entering the era of data deluge, where the amount of data outgrows the capabilities of query processing technology. Many emerging applications, from social networks to scientific experiments, are representative examples of this deluge, where the rate at which data is produced exceeds any past experience. For example, scientific analysis such as astronomy is soon expected to collect multiple Terabytes of data on a daily basis, while already web-based businesses such as social networks or web log analysis are confronted with a growing stream of large data inputs. Therefore, there is a clear need for efficient big data query processing to enable the evolution of businesses and sciences to the new era of data deluge. In this chapter, we focus on a new direction of query processing for big data where data exploration becomes a first class citizen. Data exploration is necessary when new big chunks of data arrive rapidly and we want to react quickly, i.e., with little time to spare for tuning and set-up. In particular, our discussion focuses on database systems technology, which for several decades has been the predominant data processing tool. In this chapter, we introduce the concept of data exploration and we discuss a series of early techniques from the database community towards the direction of building database systems which are tailored for big data exploration, i.e., adaptive indexing, adaptive loading and sampling-based query processing. These directions focus on reconsidering fundamental assumptions and on designing next generation database architectures for the big data era. }, author = {Stratos Idreos} } @conference {DBLP:conf/cidr/IdreosL13, title = {dbTouch: Analytics at your Fingertips}, booktitle = {Proceedings of the 7th International Conference on Innovative Data Systems Research (CIDR)}, year = {2013}, address = {Asilomar, California}, abstract = {As we enter the era of data deluge, turning data into knowledge has become the major challenge across most sciences and businesses that deal with data. In addition, as we increase our ability to create data, more and more people are confronted with data management problems on a daily basis for numerous aspects of every day life. A fundamental need is data exploration through interactive tools, i.e., being able to quickly and effortlessly determine data and patterns of interest. However, modern database systems have not been designed with data exploration and usability in mind; they require users with expert knowledge and skills, while they react in a strict and monolithic way to every user request, resulting in correct answers but slow response times. In this paper, we introduce the vision of a new generation of data management systems, called dbTouch; our vision is to enable interactive and intuitive data exploration via database kernels which are tailored for touch-based exploration. No expert knowledge is needed. Data is represented in a visual format, e.g., a column shape for an attribute or a fat rectangle shape for a table, while users can touch those shapes and interact/query with gestures as opposed to firing complex SQL queries. The system does not try to consume all data; instead it analyzes only parts of the data at a time, continuously refining the answers and continuously reacting to user input. Every single touch on a data object can be seen as a request to run an operator or a collection of operators over part of the data. Users react to running results and continuously adjust the data exploration - they continuously determine the data to be processed next by adjusting the direction and speed of a gesture, i.e., a collection of touches; the database system does not have control on the data flow anymore. We discuss the various benefits that dbTouch systems bring for data analytics as well as the new and unique challenges for database research in combination with touch interfaces. In addition, we provide an initial architecture, implementation and evaluation (and demo) of a dbTouch prototype over IOs for IPad. }, author = {Stratos Idreos and Erietta Liarou} } @conference {DBLP:conf/edbt/LiarouIMK13, title = {Enhanced Stream Processing in a DBMS Kernel}, booktitle = {Proceedings of the 16th International Conference on Extending Database Technology (EDBT)}, year = {2013}, pages = {501-512}, address = {Genoa, Italy}, abstract = {Continuous query processing has emerged as a promising query processing paradigm with numerous applications. A recent development is the need to handle both streaming queries and typical one-time queries in the same application. For example, data warehousing can greatly benefit from the integration of stream semantics, i.e., online analysis of incoming data and combination with existing data. This is especially useful to provide low latency in data-intensive analysis in big data warehouses that are augmented with new data on a daily basis. However, state-of-the-art database technology cannot handle streams efficiently due to their {\textquoteleft}{\textquoteleft}continuous" nature. At the same time, state-of-the-art stream technology is purely focused on stream applications. The research efforts are mostly geared towards the creation of specialized stream management systems built with a different philosophy than a DBMS. The drawback of this approach is the limited opportunities to exploit successful past data processing technology, e.g., query optimization techniques. For this new problem we need to combine the best of both worlds. Here we take a completely different route by designing a stream engine on top of an existing relational database kernel. This includes reuse of both its storage/execution engine and its optimizer infrastructure. The major challenge then becomes the efficient support for specialized stream features. This paper focuses on incremental window-based processing, arguably the most crucial stream-specific requirement. In order to maintain and reuse the generic storage and execution model of the DBMS, we elevate the problem at the query plan level. Proper optimizer rules, scheduling and intermediate result caching and reuse, allow us to modify the DBMS query plans for efficient incremental processing. We describe in detail the new approach and we demonstrate efficient performance even against specialized stream engines, especially when scalability becomes a crucial factor}, author = {Erietta Liarou and Stratos Idreos and Stefan Manegold and Martin L. Kersten} } @article {136711, title = {The Design and Implementation of Modern Column-Oriented Database Systems}, journal = {Foundations and Trends in Databases}, volume = {5}, number = {3}, year = {2013}, pages = {197-280}, abstract = {In this article, we survey recent research on column-oriented database systems, or column-stores, where each attribute of a table is stored in a separate file or region on storage. Such databases have seen a resurgence in recent years with a rise in interest in analytic queries that perform scans and aggregates over large portions of a few columns of a table. The main advantage of a column-store is that it can access just the columns needed to answer such queries. We specifically focus on three influential research prototypes, MonetDB, VectorWise, and C-Store. These systems have formed the basis for several well-known commercial column-store implementations. We describe their similarities and differences and discuss their specific architectural features for compression, late materialization, join processing, vectorization and adaptive indexing (database cracking).}, author = {Daniel Abadi and Peter Boncz and Stavros Harizopoulos and Stratos Idreos and Samuel Madden} } @conference {DBLP:conf/edbt/IdreosMG12, title = {Adaptive indexing in modern database kernels}, booktitle = {Proceedings of the 15th International Conference on Extending Database Technology (EDBT)}, year = {2012}, pages = {566-569}, address = {Berlin, Germany}, abstract = {Physical design represents one of the hardest problems for database management systems. Without proper tuning, systems cannot achieve good performance. Offline indexing creates indexes a priori assuming good workload knowledge and idle time. More recently, online indexing monitors the workload trends and creates or drops indexes online. Adaptive indexing takes another step towards completely automating the tuning process of a database system, by enabling incremental and partial online indexing. The main idea is that physical design changes continuously, adaptively, partially, incrementally and on demand while processing queries as part of the execution operators. As such it brings a plethora of opportunities for rethinking and improving every single corner of database system design. We will analyze the indexing space between offline, online and adaptive indexing through several state of the art indexing techniques, e. g., what-if analysis and soft indexes. We will discuss in detail adaptive indexing techniques such as database cracking, adaptive merging, sideways cracking and various hybrids that try to balance the online tuning overhead with the convergence speed to optimal performance. In addition, we will discuss how various aspects of modern techniques for database architectures, such as vectorization, bulk processing, column-store execution and storage affect adaptive indexing. Finally, we will discuss several open research topics towards fully autonomous database kernels.}, author = {Stratos Idreos and Stefan Manegold and Goetz Graefe} } @article {DBLP:journals/ercim/Idreos12, title = {Cracking Big Data}, journal = {ERCIM News. Special theme: Big Data}, year = {2012}, url = {http://ercim-news.ercim.eu/en89/special/cracking-big-data}, author = {Stratos Idreos} } @article {DBLP:journals/debu/IdreosGNMMK12, title = {MonetDB: Two Decades of Research in Column-oriented Database Architectures}, journal = {IEEE Data Engineering Bulletin}, volume = {35}, number = {1}, year = {2012}, pages = {40-45}, abstract = {MonetDB is a state-of-the-art open-source column-store database management system targeting applications in need for analytics over large collections of data. MonetDB is actively used nowadays in health care, in telecommunications as well as in scientific databases and in data management research, accumulating on average more than 10,000 downloads on a monthly basis. This paper gives a brief overview of the MonetDB technology as it developed over the past two decades and the main research highlights which drive the current MonetDB design and form the basis for its future evolution.}, author = {Stratos Idreos and Fabian Groffen and Niels Nes and Stefan Manegold and K. Sjoerd Mullender and Martin L. Kersten} } @article {DBLP:journals/pvldb/LiarouIMK12, title = {MonetDB/DataCell: Online Analytics in a Streaming Column-Store}, journal = {Proceedings of the Very Large Databases Endowment (PVLDB)}, volume = {5}, number = {12}, year = {2012}, pages = {1910-1913}, abstract = {In DataCell, we design streaming functionalities in a modern relational database kernel which targets big data analytics. This includes exploitation of both its storage/execution engine and its optimizer infrastructure. We investigate the opportunities and challenges that arise with such a direction and we show that it carries significant advantages for modern applications in need for online analytics such as web logs, network monitoring and scientific data management. The major challenge then becomes the efficient support for specialized stream features, e.g., multi-query processing and incremental window-based processing as well as exploiting standard DBMS functionalities in a streaming environment such as indexing. This demo presents DataCell, an extension of the MonetDB open-source column-store for online analytics. The demo gives users the opportunity to experience the features of DataCell such as processing both stream and persistent data and performing window based processing. The demo provides a visual interface to monitor the critical system components, e.g., how query plans transform from typical DBMS query plans to online query plans, how data flows through the query plans as the streams evolve, how DataCell maintains intermediate results in columnar form to avoid repeated evaluation of the same stream portions, etc. The demo also provides the ability to interactively set the test scenarios and various DataCell knobs.}, author = {Erietta Liarou and Stratos Idreos and Stefan Manegold and Martin L. Kersten} } @conference {DBLP:conf/sigmod/AlagiannisBBIA12, title = {NoDB: efficient query execution on raw data files}, booktitle = {Proceedings of the ACM SIGMOD International Conference on Management of Data}, year = {2012}, pages = {241-252}, address = {Scottsdale, Arizona}, abstract = {As data collections become larger and larger, data loading evolves to a major bottleneck. Many applications already avoid using database systems, e.g., scientific data analysis and social networks, due to the complexity and the increased data-to-query time. For such applications data collections keep growing fast, even on a daily basis, and we are already in the era of data deluge where we have much more data than what we can move, store, let alone analyze. Our contribution in this paper is the design and roadmap of a new paradigm in database systems, called NoDB, which do not require data loading while still maintaining the whole feature set of a modern database system. In particular, we show how to make raw data files a first-class citizen, fully integrated with the query engine. Through our design and lessons learned by implementing the NoDB philosophy over a modern DBMS, we discuss the fundamental limitations as well as the strong opportunities that such a research path brings. We identify performance bottlenecks specific for in situ processing, namely the repeated parsing and tokenizing overhead and the expensive data type conversion costs. To address these problems, we introduce an adaptive indexing mechanism that maintains positional information to provide efficient access to raw data files, together with a flexible caching structure. Our implementation over PostgreSQL, called PostgresRaw, is able to avoid the loading cost completely, while matching the query performance of plain PostgreSQL and even outperforming it in many cases. We conclude that NoDB systems are feasible to design and implement over modern database architectures, bringing an unprecedented positive effect in usability and performance.}, author = {Ioannis Alagiannis and Renata Borovica and Miguel Branco and Stratos Idreos and Anastasia Ailamaki} } @article {DBLP:journals/pvldb/AlagiannisBBIA12, title = {NoDB in Action: Adaptive Query Processing on Raw Data}, journal = {Proceedings of the Very Large Databases Endowment (PVLDB)}, volume = {5}, number = {12}, year = {2012}, pages = {1942-1945}, abstract = {As data collections become larger and larger, users are faced with increasing bottlenecks in their data analysis. More data means more time to prepare the data, to load the data into the database and to execute the desired queries. Many applications already avoid using traditional database systems, e.g., scientific data analysis and social networks, due to their complexity and the increased data-to-query time, i.e. the time between getting the data and retrieving its first useful results. For many applications data collections keep growing fast, even on a daily basis, and this data deluge will only increase in the future, where it is expected to have much more data than what we can move or store, let alone analyze. In this demonstration, we will showcase a new philosophy for designing database systems called NoDB. NoDB aims at minimizing the data-to-query time, most prominently by removing the need to load data before launching queries. We will present our prototype implementation, PostgresRaw, built on top of PostgreSQL, which allows for efficient query execution over raw data files with zero initialization overhead. We will visually demonstrate how PostgresRaw incrementally and adaptively touches, parses, caches and indexes raw data files autonomously and exclusively as a side-effect of user queries.}, author = {Ioannis Alagiannis and Renata Borovica and Miguel Branco and Stratos Idreos and Anastasia Ailamaki} } @article {DBLP:journals/pvldb/HalimIKY12, title = {Stochastic Database Cracking: Towards Robust Adaptive Indexing in Main-Memory Column-Stores}, journal = {Proceedings of the Very Large Databases Endowment (PVLDB)}, volume = {5}, number = {6}, year = {2012}, pages = {502-513}, abstract = {Modern business applications and scientific databases call for inherently dynamic data storage environments. Such environments are characterized by two challenging features: (a) they have little idle system time to devote on physical design; and (b) there is little, if any, a priori workload knowledge, while the query and data workload keeps changing dynamically. In such environments, traditional approaches to index building and maintenance cannot apply. Database cracking has been proposed as a solution that allows on-the-fly physical data reorganization, as a collateral effect of query processing. Cracking aims to continuously and automatically adapt indexes to the workload at hand, without human intervention. Indexes are built incrementally, adaptively, and on demand. Nevertheless, as we show, existing adaptive indexing methods fail to deliver workload-robustness; they perform much better with random workloads than with others. This frailty derives from the inelasticity with which these approaches interpret each query as a hint on how data should be stored. Current cracking schemes blindly reorganize the data within each query{\textquoteright}s range, even if that results into successive expensive operations with minimal indexing benefit. In this paper, we introduce stochastic cracking, a significantly more resilient approach to adaptive indexing. Stochastic cracking also uses each query as a hint on how to reorganize data, but not blindly so; it gains resilience and avoids performance bottlenecks by deliberately applying certain arbitrary choices in its decision-making. Thereby, we bring adaptive indexing forward to a mature formulation that confers the workload-robustness previous approaches lacked. Our extensive experimental study verifies that stochastic cracking maintains the desired properties of original database cracking while at the same time it performs well with diverse realistic workloads.}, author = {Felix Halim and Stratos Idreos and Panagiotis Karras and Roland H. C. Yap} } @article {DBLP:journals/pvldb/GraefeHIKM12, title = {Concurrency Control for Adaptive Indexing}, journal = {Proceedings of the Very Large Databases Endowment (PVLDB)}, volume = {5}, year = {2012}, pages = {656-667}, abstract = {Adaptive indexing initializes and optimizes indexes incrementally, as a side effect of query processing. The goal is to achieve the benefits of indexes while hiding or minimizing the costs of index creation. However, index-optimizing side effects seem to turn read-only queries into update transactions that might, for example, create lock contention. This paper studies concurrency control in the context of adaptive indexing. We show that the design and implementation of adaptive indexing rigorously separates index structures from index contents; this relaxes the constraints and requirements during adaptive indexing compared to those of traditional index updates. Our design adapts to the fact that an adaptive index is refined continuously, and exploits any concurrency opportunities in a dynamic way. A detailed experimental analysis demonstrates that (a) adaptive indexing maintains its adaptive properties even when running concurrent queries, (b) adaptive indexing can exploit the opportunity for parallelism due to concurrent queries, (c) the number of concurrency conflicts and any concurrency administration overheads follow an adaptive behavior, decreasing as the workload evolves and adapting to the workload needs.}, author = {Goetz Graefe and Felix Halim and Stratos Idreos and Harumi A. Kuno and Stefan Manegold} } @article {DBLP:journals/debu/BarberBCDHHIKKLLLMMMPQRSSS12, title = {Business Analytics in (a) Blink}, journal = {IEEE Data Engineering Bulletin}, volume = {35}, number = {1}, year = {2012}, pages = {9-14}, abstract = {The Blink project{\textquoteright}s ambitious goal is to answer all Business Intelligence (BI) queries in mere seconds, regardless of the database size, with an extremely low total cost of ownership. Blink is a new DBMS aimed primarily at read-mostly BI query processing that exploits scale-out of commodity multi-core processors and cheap DRAM to retain a (copy of a) data mart completely in main memory. Additionally, it exploits proprietary compression technology and cache-conscious algorithms that reduce memory bandwidth consumption and allow most SQL query processing to be performed on the compressed data. Blink always scans (portions of) the data mart in parallel on all nodes, without using any indexes or materialized views, and without any query optimizer to choose among them. The Blink technology has thus far been incorporated into two IBM accelerator products generally available since March 2011. We are now working on the next generation of Blink, which will significantly expand the {\textquotedblleft}sweet spot{\textquotedblright} of the Blink technology to much larger, disk-based warehouses and allow Blink to {\textquotedblleft}own{\textquotedblright} the data, rather than copies of it.}, author = {Ronald Barber and Peter Bendel and Marco Czech and Oliver Draese and Frederick Ho and Namik Hrle and Stratos Idreos and Min-Soo Kim and Oliver Koeth and Jae-Gil Lee and Tianchao Tim Li and Guy Lohman and Konstantinos Morfonios and Ren{\'e} M{\"u}ller and Keshava Murthy and Ippokratis Pandis and Lin Qiao and Vijayshankar Raman and Richard Sidle and Knut Stolze and Sandor Szabo} } @conference {137046, title = {Too Many Links in the Horizon; What is Next? Linked Views and Linked History}, booktitle = {Proceedings of the 10th International Semantic Web Conference (ISWC)}, year = {2011}, address = {Bonn, Germany}, abstract = {The trend for more online linked data becomes stronger. Foreseeing a future where {\textquotedblleft}everything{\textquotedblright} will be online and linked, we ask the critical question; what is next? We envision that managing, querying and storing large amounts of links and data is far from yet another query processing task. We highlight two distinct and promising research directions towards managing and making sense of linked data. We introduce linked views to help focusing on specific link and data instances and linked history to help observe how links and data change over time.} } @conference {DBLP:conf/cidr/IdreosAJA11, title = {Here are my Data Files. Here are my Queries. Where are my Results?}, booktitle = {Proceedings of the 5th International Conference on Innovative Data Systems Research (CIDR)}, year = {2011}, pages = {57-68}, address = {Asilomar, California}, abstract = {Database management systems (DBMS) provide incredible flexibility and performance when it comes to query processing, scalability and accuracy. To fully exploit DBMS features, however, the user must define a schema, load the data, tune the system for the expected workload, and answer several questions. Should the database use a column-store, a row-store or some hybrid format? What indices should be created? All these questions make for a formidable and time-consuming hurdle, often deterring new applications or imposing high cost to existing ones. A characteristic example is that of scientific databases with huge data sets. The prohibitive initialization cost and complexity still forces scientists to rely on {\textquoteleft}{\textquoteleft}ancient" tools for their data management tasks, delaying scientific understanding and progress. Users and applications collect their data in flat files, which have traditionally been considered to be {\textquoteleft}{\textquoteleft}outside" a DBMS. A DBMS wants control: always bring all data {\textquoteleft}{\textquoteleft}inside", replicate it and format it in its own {\textquoteleft}{\textquoteleft}secret" way. The problem has been recognized and current efforts extend existing systems with abilities such as reading information from flat files and gracefully incorporating it into the processing engine. This paper proposes a new generation of systems where the only requirement from the user is a link to the raw data files. Queries can then immediately be fired without preparation steps in between. Internally and in an abstract way, the system takes care of selectively, adaptively and incrementally providing the proper environment given the queries at hand. Only part of the data is loaded at any given time and it is being stored and accessed in the format suitable for the current workload.}, author = {Stratos Idreos and Ioannis Alagiannis and Ryan Johnson and Anastasia Ailamaki} } @article {DBLP:journals/sigmod/BonnetMBCGGHIIJKKMOPRTYFS11, title = {Repeatability and workability evaluation of SIGMOD 2011}, journal = {SIGMOD Record}, volume = {40}, year = {2011}, pages = {45-48}, abstract = {SIGMOD has offered, since 2008, to verify the experiments published in the papers accepted at the conference. This year, we have been in charge of reproducing the experiments provided by the authors (repeatability), and exploring changes to experiment parameters (workability). In this paper, we assess the SIGMOD repeatability process in terms of participation, review process and results. While the participation is stable in terms of number of submissions, we find this year a sharp contrast between the high participation from Asian authors and the low participation from American authors. We also find that most experiments are distributed as Linux packages accompanied by instructions on how to setup and run the experiments. We are still far from the vision of executable papers.}, author = {Philippe Bonnet and Stefan Manegold and Matias Bj{\o}rling and Wei Cao and Javier Gonzalez and Joel A. Granados and Nancy Hall and Stratos Idreos and Milena Ivanova and Ryan Johnson and David Koop and Tim Kraska and Ren{\'e} M{\"u}ller and Dan Olteanu and Paolo Papotti and Christine Reilly and Dimitris Tsirogiannis and Cong Yu and Juliana Freire and Dennis Shasha} } @article {DBLP:journals/pvldb/KerstenIML11, title = {The Researcher{\textquoteright}s Guide to the Data Deluge: Querying a Scientific Database in Just a Few Seconds}, journal = {Proceedings of the Very Large Databases Endowment (PVLDB)}, volume = {4}, number = {12}, year = {2011}, pages = {1474-1477}, abstract = {There is a clear need nowadays for extremely large data processing. This is especially true in the area of scientific data management where soon we expect data inputs in the order of multiple Petabytes. However, current data management technology is not suitable for such data sizes. In the light of such new database applications, we can rethink some of the strict requirements database systems adopted in the past. We argue that correctness is such a critical property, responsible for performance degradation. In this paper, we propose a new paradigm towards building database kernels that may produce wrong but fast, cheap and indicative results. Fast response times is an essential component of data analysis for exploratory applications; allowing for fast queries enables the user to develop a {\textquoteleft}{\textquoteleft}feeling" for the data through a series of {\textquoteleft}{\textquoteleft}painless" queries which eventually leads to more detailed analysis in a targeted data area. We propose a research path where a database kernel autonomously and on-the-fly decides to reduce the processing requirements of a running query based on workload, hardware and environmental parameters. It requires a complete redesign of database operators and query processing strategy. For example, typical and very common scenarios were query processing performance degrades significantly are cases where a database operator has to spill data to disk, or is forced to perform random access, or has to follow long linked lists, etc. Here we ask the question: What if we simply avoid these steps, {\textquoteleft}{\textquoteleft}ignoring" the side-effect in the correctness of the result?}, author = {Martin L. Kersten and Stratos Idreos and Stefan Manegold and Erietta Liarou} } @article {DBLP:journals/pvldb/IdreosMKG11, title = {Merging What{\textquoteright}s Cracked, Cracking What{\textquoteright}s Merged: Adaptive Indexing in Main-Memory Column-Stores}, journal = {Proceedings of the Very Large Databases Endowment (PVLDB)}, volume = {4}, year = {2011}, pages = {585-597}, abstract = {Adaptive indexing is characterized by the partial creation and refinement of the index as side effects of query execution. Dynamic or shifting workloads may benefit from preliminary index structures focused on the columns and specific key ranges actually queried --- without incurring the cost of full index construction. The costs and benefits of adaptive indexing techniques should therefore be compared in terms of initialization costs, the overhead imposed upon queries, and the rate at which the index converges to a state that is fully-refined for a particular workload component. Based on an examination of database cracking and adaptive merging, which are two techniques for adaptive indexing, we seek a hybrid technique that has a low initialization cost and also converges rapidly. We find the strengths and weaknesses of database cracking and adaptive merging complementary. One has a relatively high initialization cost but converges rapidly. The other has a low initialization cost but converges relatively slowly. We analyze the sources of their respective strengths and explore the space of hybrid techniques. We have designed and implemented a family of hybrid algorithms in the context of a column-store database system. Our experiments compare their behavior against database cracking and adaptive merging, as well as against both traditional full index lookup and scan of unordered data. We show that the new hybrids significantly improve over past methods while at least two of the hybrids come very close to the {\textquoteleft}{\textquoteleft}ideal performance{\textquoteright}{\textquoteright} in terms of both overhead per query and convergence to a final state.}, author = {Stratos Idreos and Stefan Manegold and Harumi A. Kuno and Goetz Graefe} } @conference {DBLP:conf/birte/BarberBCDHHIKKLLLMMMPQRSSS11, title = {Blink: Not Your Father{\textquoteright}s Database!}, booktitle = {Proceedings of the 5th International Workshop on Business Intelligence for the Real-Time Enterprises (BIRTE)}, year = {2011}, pages = {1-22}, abstract = {The Blink project{\textquoteright}s ambitious goals are to answer all Business Intelligence (BI) queries in mere seconds, regardless of the database size, with an extremely low total cost of ownership. It takes a very innovative and counter-intuitive approach to processing BI queries, one that exploits several disruptive hardware and software technology trends. Specifically, it is a new, workload-optimized DBMS aimed primarily at BI query processing, and exploits scale-out of commodity multi-core processors and cheap DRAM to retain a (copy of a) data mart completely in main memory. Additionally, it exploits proprietary compression technology and cache-conscious algorithms that reduce memory bandwidth consumption and allow most SQL query processing to be performed on the compressed data. Ignoring the general wisdom of the last three decades that the only way to scalably search large databases is with indexes, Blink always performs simple, {\textquotedblleft}brute force{\textquotedblright} scans of the entire data mart in parallel on all nodes, without using any indexes or materialized views, and without any query optimizer to choose among them. The Blink technology has thus far been incorporated into two products: (1) an accelerator appliance product for DB2 for z/OS (on the {\textquotedblleft}mainframe{\textquotedblright}), called the IBM Smart Analytics Optimizer for DB2 for z/OS, V1.1, which was generally available in November 2010; and (2) the Informix Warehouse Accelerator (IWA), a software-only version that was generally available in March 2011. We are now working on the next generation of Blink, called BLink Ultra, or BLU, which will significantly expand the {\textquotedblleft}sweet spot{\textquotedblright} of Blink technology to much larger, disk-based warehouses and allow BLU to {\textquotedblleft}own{\textquotedblright} the data, rather than copies of it.}, author = {Ronald Barber and Peter Bendel and Marco Czech and Oliver Draese and Frederick Ho and Namik Hrle and Stratos Idreos and Min-Soo Kim and Oliver Koeth and Jae-Gil Lee and Tianchao Tim Li and Guy Lohman and Konstantinos Morfonios and Ren{\'e} M{\"u}ller and Keshava Murthy and Ippokratis Pandis and Lin Qiao and Vijayshankar Raman and Sandor Szabo and Richard Sidle and Knut Stolze} } @mastersthesis {136971, title = {Database Cracking: Towards Auto-tuning Database Kernels}, year = {2010}, type = {PhD Thesis}, abstract = {Indices are heavily used in database systems in order to achieve the ultimate query processing performance. It takes a lot of time to create an index and the system needs to reserve extra storage space to store the auxiliary data structure. When updates arrive, there is also the overhead of maintaining the index. This way, which indices to create and when to create them has been and still is one of the most important research topics over the last decades. If the workload is known up-front or it can be predicted and if there is enough idle time to spare, then we can a priori create all necessary indices and exploit them when queries arrive. But what happens if we do not have this knowledge or idle time? Similarly, what happens if the workload changes often, suddenly and in an unpredictable way? Even if we can correctly analyze the current workload, it may well be that by the time we finish our analysis and create all necessary indices, the workload pattern has changed. Here we argue that a database system should just be given the data and queries in a declarative way and the system should internally take care of fi nding not only the proper algorithms and query plans but also the proper physical design to match the workload and application needs. The goal is to remove the role of database administrators, leading to systems that can completely automatically self-tune and adapt even to dynamic environments. Database Cracking implements the fi rst adaptive kernel that automatically adapts to the access patterns by selectively and adaptively optimizing the data set purely for the workload at hand. It continuously reorganizes input data on-the-fly as a side-e fect of query processing using queries as an advice of how data should be stored. Everything happens within operator calls during query processing and brings knowledge to the system that future operators in future queries can exploit. Essentially, the necessary indices are built incrementally as the system gains more and more knowledge about the workload needs. }, author = {Stratos Idreos} } @conference {DBLP:conf/tpctc/GraefeIKM10, title = {Benchmarking Adaptive Indexing}, booktitle = {Proceedings of the 2nd TPC Technology Conference on Performance Evaluation \& Benchmarking (TPCTC)}, year = {2010}, pages = {169-184}, address = { Singapore}, abstract = {Ideally, realizing the best physical design for the current and all subsequent workloads would impact neither performance nor storage usage. In reality, workloads and datasets can change dramatically over time and index creation impacts the performance of concurrent user and system activity. We propose a framework that evaluates the key premise of adaptive indexing -- a new indexing paradigm where index creation and re-organization take place automatically and incrementally, as a side-effect of query execution. We focus on how the incremental costs and benefits of dynamic reorganization are distributed across the workload{\textquoteright}s lifetime. We believe measuring the costs and utility of the stages of adaptation are relevant metrics for evaluating new query processing paradigms and comparing them to traditional approaches.}, author = {Goetz Graefe and Stratos Idreos and Harumi A. Kuno and Stefan Manegold} } @conference {DBLP:conf/icde/IdreosKNR10, title = {Estimating the compression fraction of an index using sampling}, booktitle = {Proceedings of the 22nd IEEE International Conference in Data Engineering (ICDE)}, year = {2010}, pages = {441-444}, address = {Long Beach, California}, abstract = {Data compression techniques such as null suppression and dictionary compression are commonly used in today{\textquoteright}s database systems. In order to effectively leverage compression, it is necessary to have the ability to efficiently and accurately estimate the size of an index if it were to be compressed. Such an analysis is critical if automated physical design tools are to be extended to handle compression. Several database systems today provide estimators for this problem based on random sampling. While this approach is efficient, there is no previous work that analyses its accuracy. In this paper, we analyse the problem of estimating the compressed size of an index from the point of view of worst-case guarantees. We show that the simple estimator implemented by several database systems has several {\textquotedblleft}good{\textquotedblright} cases even though the estimator itself is agnostic to the internals of the specific compression algorithm. }, author = {Stratos Idreos and Raghav Kaushik and Vivek R. Narasayya and Ravishankar Ramamurthy} } @conference {DBLP:conf/edbt/LiarouGI09, title = {Exploiting the power of relational databases for efficient stream processing}, booktitle = {Proceedings of the 19th International Conference on Extending Database Technology (EDBT)}, year = {2009}, pages = {323-334}, address = {Saint-Petersburg, Russia}, abstract = {Stream applications gained significant popularity over the last years that lead to the development of specialized stream engines. These systems are designed from scratch with a different philosophy than nowadays database engines in order to cope with the stream applications requirements. However, this means that they lack the power and sophisticated techniques of a full fledged database system that exploits techniques and algorithms accumulated over many years of database research. In this paper, we take the opposite route and design a stream engine directly on top of a database kernel. Incoming tuples are directly stored upon arrival in a new kind of system tables, called baskets. A continuous query can then be evaluated over its relevant baskets as a typical one-time query exploiting the power of the relational engine. Once a tuple has been seen by all relevant queries/operators, it is dropped from its basket. A basket can be the input to a single or multiple similar query plans. Furthermore, a query plan can be split into multiple parts each one with its own input/output baskets allowing for flexible load sharing query scheduling. Contrary to traditional stream engines, that process one tuple at a time, this model allows batch processing of tuples, e.g., query a basket only after x tuples arrive or after a time threshold has passed. Furthermore, we are not restricted to process tuples in the order they arrive. Instead, we can selectively pick tuples from a basket based on the query requirements exploiting a novel query component, the basket expressions. We investigate the opportunities and challenges that arise with such a direction and we show that it carries significant advantages. We propose a complete architecture, the DataCell, which we implemented on top of an open-source column-oriented DBMS. A detailed analysis and experimental evaluation of the core algorithms using both micro benchmarks and the standard Linear Road benchmark demonstrate the potential of this new approach.}, author = {Erietta Liarou and Romulo Goncalves and Stratos Idreos} } @conference {DBLP:conf/sigmod/IdreosKM09, title = {Self-organizing tuple reconstruction in column-stores}, booktitle = {Proceedings of the 29th ACM SIGMOD International Conference on Management of Data}, year = {2009}, pages = {297-308}, address = {Providence, Rhode Island}, abstract = {Column-stores gained popularity as a promising physical design alternative. Each attribute of a relation is physically stored as a separate column allowing queries to load only the required attributes. The overhead incurred is on-the-fly tuple reconstruction for multi-attribute queries. Each tuple reconstruction is a join of two columns based on tuple IDs, making it a significant cost component. The ultimate physical design is to have multiple presorted copies of each base table such that tuples are already appropriately organized in multiple different orders across the various columns. This requires the ability to predict the workload, idle time to prepare, and infrequent updates. In this paper, we propose a novel design, partial sideways cracking, that minimizes the tuple reconstruction cost in a self-organizing way. It achieves performance similar to using presorted data, but without requiring the heavy initial presorting step itself. Instead, it handles dynamic, unpredictable workloads with no idle time and frequent updates. Auxiliary dynamic data structures, called cracker maps, provide a direct mapping between pairs of attributes used together in queries for tuple reconstruction. A map is continuously physically reorganized as an integral part of query evaluation, providing faster and reduced data access for future queries. To enable flexible and self-organizing behavior in storage-limited environments, maps are materialized only partially as demanded by the workload. Each map is a collection of separate chunks that are individually reorganized, dropped or recreated as needed. We implemented partial sideways cracking in an open-source column-store. A detailed experimental analysis demonstrates that it brings significant performance benefits for multi-attribute queries.}, author = {Stratos Idreos and Martin L. Kersten and Stefan Manegold} } @conference {DBLP:conf/edbt/IdreosLK08, title = {Continuous multi-way joins over distributed hash tables}, booktitle = {Proceedings of the 11th International Conference on Extending Database Technology (EDBT)}, year = {2008}, pages = {594-605}, address = {Nantes, France}, abstract = {This paper studies the problem of evaluating continuous multi-way joins on top of Distributed Hash Tables (DHTs). We present a novel algorithm, called recursive join (RJoin), that takes into account various parameters crucial in a distributed setting i.e., network traffic, query processing load distribution, storage load distribution etc. The key idea of RJoin is incremental evaluation: as relevant tuples arrive continuously, a given multi-way join is rewritten continuously into a join with fewer join operators, and is assigned continuously to different nodes of the network. In this way, RJoin distributes the responsibility of evaluating a continuous multi-way join to many network nodes by assigning parts of the evaluation of each binary join to a different node depending on the values of the join attributes. The actual nodes to be involved are decided by RJoin dynamically after taking into account the rate of incoming tuples with values equal to the values of the joined attributes. RJoin also supports sliding window joins which is a crucial feature, especially for long join paths, since it provides a mechanism to reduce the query processing state and thus keep the cost of handling incoming tuples stable. In addition, RJoin is able to handle message delays due to heavy network traffic. We present a detailed mathematical and experimental analysis of RJoin and study the performance tradeoffs that occur.}, author = {Stratos Idreos and Erietta Liarou and Manolis Koubarakis} } @conference {DBLP:conf/semweb/LiarouIK07, title = {Continuous RDF Query Processing over DHTs}, booktitle = {Proceedings of the 6th International Semantic Web Conference (ISWC)}, year = {2007}, pages = {324-339}, address = {Busan, Korea}, abstract = {We study the continuous evaluation of conjunctive triple pattern queries over RDF data stored in distributed hash tables. In a continuous query scenario network nodes subscribe with long-standing queries and receive answers whenever RDF triples satisfying their queries are published. We present two novel query processing algorithms for this scenario and analyze their properties formally. Our performance goal is to have algorithms that scale to large amounts of RDF data, distribute the storage and query processing load evenly and incur as little network traffic as possible. We discuss the various performance tradeoffs that occur through a detailed experimental evaluation of the proposed algorithms.}, author = {Erietta Liarou and Stratos Idreos and Manolis Koubarakis} } @conference {DBLP:conf/sigmod/IdreosKM07, title = {Updating a cracked database}, booktitle = {Proceedings of the 27th ACM SIGMOD International Conference on Management of Data}, year = {2007}, pages = {413-424}, address = {Beijing, China}, abstract = {A cracked database is a datastore continuously reorganized based on operations being executed. For each query, the data of interest is physically reclustered to speed-up future access to the same, overlapping or even disjoint data. This way, a cracking DBMS self-organizes and adapts itself to the workload. So far, cracking has been considered for static databases only. In this paper, we introduce several novel algorithms for high-volume insertions, deletions and updates against a cracked database. We show that the nice performance properties of a cracked database can be maintained in a dynamic environment where updates interleave with queries. Our algorithms comply with the cracking philosophy, i.e., a table is informed on pending insertions and deletions, but only when the relevant data is needed for query processing just enough pending update actions are applied. We discuss details of our implementation in the context of an open-source DBMS and we show through a detailed experimental evaluation that our algorithms always manage to keep the cost of querying a cracked datastore with pending updates lower than the non-cracked case.}, author = {Stratos Idreos and Martin L. Kersten and Stefan Manegold} } @conference {DBLP:conf/cidr/IdreosKM07, title = {Database Cracking}, booktitle = {Proceedings of the 3rd International Conference on Innovative Data Systems Research (CIDR)}, year = {2007}, pages = {68-78}, address = {Asilomar, California}, abstract = {Database indices provide a non-discriminative navigational infrastructure to localize tuples of interest. Their maintenance cost is taken during database updates. In this paper, we study the complementary approach, addressing index maintenance as part of query processing using continuous physical reorganization, i.e., cracking the database into manageable pieces. The motivation is that by automatically organizing data the way users request it, we can achieve fast access and the much desired self-organized behavior. We present the first mature cracking architecture and report on our implementation of cracking in the context of a full fledged relational system. It led to a minor enhancement to its relational algebra kernel, such that cracking could be piggy-backed without incurring too much processing overhead. Furthermore, we illustrate the ripple effect of dynamic reorganization on the query plans derived by the SQL optimizer. The experiences and results obtained are indicative of a significant reduction in system complexity. We show that the resulting system is able to self-organize based on incoming requests with clear performance benefits. This behavior is visible even when the user focus is randomly shifting to different parts of the data.}, author = {Stratos Idreos and Martin L. Kersten and Stefan Manegold} } @inbook {136986, title = {Semantic Grid Resource Discovery using DHTs in Atlas}, booktitle = {Knowledge and Data Management in Grids}, year = {2006}, publisher = {Springer}, organization = {Springer}, abstract = {We study the problem of resource discovery in the Semantic Grid. We show how to solve this problem by utilizing Atlas, a P2P system for the distributed storage and retrieval of RDF(S) data. Atlas is currently under development in project OntoGrid funded by FP6. Atlas is built on top of the distributed hash table Bamboo and supports pull and push querying scenarios. It inherits all the nice features of Bamboo (openness, scalability, fault-tolerance, resistance to high churn rates) and extends Bamboo{\textquoteright}s protocols for storing and querying RDF(S) data. Atlas is being used currently to realize the metadata service of S-OGSA in a fully distributed and scalable way. In this paper, we concentrate on the main features of Atlas and demonstrate its use for Semantic Grid resource discovery in an OntoGrid use case scenario.}, author = {Zoi Kaoudi and Iris Miliaraki and Matoula Magiridou and Erietta Liarou and Stratos Idreos and Manolis Koubarakis} } @inbook {DBLP:books/daglib/p/ChiritaIKN06, title = {Designing Semantic Publish/Subscribe Networks Using Super-Peers}, booktitle = {Semantic Web and Peer-to-Peer}, year = {2006}, pages = {159-179}, abstract = { Publish/subscribe systems are an alternative to query-based systems in cases where the same information is asked for over and over, and where clients want to get updated answers for the same query over a period of time. Recent publish/subscribe systems such as P2P-DIET have introduced this paradigm in the P2P context. In this chapter we built on the experience gained with P2P-DIET and the Edutella super-peer infrastructure and present a semantic publish/subscribe system supporting metadata and a query language based on RDF. We define formally the basic concepts of our system and present detailed protocols for its operation.}, author = {Paul-Alexandru Chirita and Stratos Idreos and Manolis Koubarakis and Wolfgang Nejdl} } @conference {DBLP:conf/icde/IdreosTK06, title = {Distributed Evaluation of Continuous Equi-join Queries over Large Structured Overlay Networks}, booktitle = {Proceedings of the 22nd IEEE International Conference in Data Engineering (ICDE)}, year = {2006}, pages = {43}, address = {Atlanta, Georgia}, abstract = {We study the problem of continuous relational query processing in Internet-scale overlay networks realized by distributed hash tables. We concentrate on the case of continuous two-way equi-join queries. Joins are hard to evaluate in a distributed continuous query environment because data from more than one relations is needed, and this data is inserted in the network asynchronously. Each time a new tuple is inserted, the network nodes have to cooperate to check if this tuple can contribute to the satisfaction of a query when combined with previously inserted tuples. We propose a series of algorithms that initially index queries at network nodes using hashing. Then, they exploit the values of join attributes in incoming tuples to rewrite the given queries into simpler ones, and reindex them in the network where they might be satisfied by existing or future tuples. We present a detailed experimental evaluation in a simulated environment and we show that our algorithms are scalable, balance the storage and query processing load and keep the network traffic low.}, author = {Stratos Idreos and Christos Tryfonopoulos and Manolis Koubarakis} } @conference {DBLP:conf/semweb/LiarouIK06, title = {Evaluating Conjunctive Triple Pattern Queries over Large Structured Overlay Networks}, booktitle = {Proceedings of the 5th International Semantic Web Conference (ISWC)}, year = {2006}, pages = {399-413}, address = {Athens, Georgia }, abstract = {We study the problem of evaluating conjunctive queries com- posed of triple patterns over RDF data stored in distributed hash tables. Our goal is to develop algorithms that scale to large amounts of RDF data, distribute the query processing load evenly and incur little network traffic. We present and evaluate two novel query processing algorithms with these possibly conflicting goals in mind. We discuss the various tradeoffs that occur in our setting through a detailed experimental evaluation of the proposed algorithms.}, author = {Erietta Liarou and Stratos Idreos and Manolis Koubarakis} } @mastersthesis {136976, title = {Distributed Evaluation of Continuous Equi-join Queries over Large Structured Overlay Networks}, year = {2005}, type = {Master Thesis}, author = {Stratos Idreos} } @conference {DBLP:conf/ercimdl/TryfonopoulosIK05, title = {LibraRing: An Architecture for Distributed Digital Libraries Based on DHTs}, booktitle = {Proceedings of the 9th European Conference on Research and Advanced Technology for Digital Libraries (ECDL)}, year = {2005}, pages = {25-36}, address = {Vienna, Austria}, abstract = {We present a digital library architecture based on distributed hash tables. We discuss the main components of this architecture and the protocols for offering information retrieval and information filtering functionality. We present an experimental evaluation of our proposals. }, author = {Christos Tryfonopoulos and Stratos Idreos and Manolis Koubarakis} } @conference {DBLP:conf/sigir/TryfonopoulosIK05, title = {Publish/subscribe functionality in IR environments using structured overlay networks}, booktitle = {Proceedings of the 28th Annual International ACM SIGIR Conference}, year = {2005}, pages = {322-329}, address = {Salvador, Brazil}, abstract = {We study the problem of offering publish/subscribe functionality on top of structured overlay networks using data models and languages from IR. We show how to achieve this by extending the distributed hash table Chord and present a detailed experimental evaluation of our proposals.}, author = {Christos Tryfonopoulos and Stratos Idreos and Manolis Koubarakis} } @conference {DBLP:conf/dbisp2p/LiarouIK05, title = {Publish/Subscribe with RDF Data over Large Structured Overlay Networks}, booktitle = {Proceedings of the 3rd International Workshop on Databases, Information Systems and Peer-to-Peer Computing (DBISP2P)}, year = {2005}, pages = {135-146}, abstract = {We study the problem of evaluating RDF queries over structured overlay networks.We consider the publish/subscribe scenario where nodes subscribewith long-standing queries and receive notifications whenever triples matching their queries are inserted in the network. In this paper we focus on conjunctive multi-predicate queries. We demonstrate that these queries are useful in various modern applications e.g., distributed digital libraries or Grid resource discovery. Conjunctive multipredicate queries are hard to answer since multiple triples are necessary for their evaluation, and these triples will usually be inserted in the network asynchronously. We present and evaluate query processing algorithms that are scalable and distribute the query processing load evenly.}, author = {Erietta Liarou and Stratos Idreos and Manolis Koubarakis} } @conference {DBLP:conf/setn/IdreosK04, title = {P2P-DIET: Ad-hoc and Continuous Queries in Peer-to-Peer Networks Using Mobile Agents}, booktitle = {Proceedings of the 3rd Hellenic Conference in Artificial Intelligence (SETN)}, year = {2004}, pages = {23-32}, address = {Samos, Greece}, abstract = {This paper presents P2P-DIET, a resource sharing system that unifies ad-hoc and continuous query processing in super-peer networks using mobile agents. P2P-DIET offers a simple data model for the description of network resources based on attributes with values of type text. It also utilizes very efficient query processing algorithms based on indexing of resource metadata and queries. The capability of location-independent addressing is supported, which enables P2P-DIET clients to connect from anywhere in the network and use dynamic IP addresses. The features of stored notifications and rendezvous guarantee that all important information is delivered to interested clients even if they have been disconnected for some time. P2P-DIET has been developed on top of the Open Source mobile agent system DIET Agents and is currently been demonstrated as a file sharing application.}, author = {Stratos Idreos and Manolis Koubarakis} } @conference {DBLP:conf/sigmod/IdreosKT04, title = {P2P-DIET: An Extensible P2P Service that Unifies Ad-hoc and Continuous Querying in Super-Peer Networks}, booktitle = {Proceedings of the ACM SIGMOD International Conference on Management of Data}, year = {2004}, pages = {933-934}, address = {Paris, France}, author = {Stratos Idreos and Manolis Koubarakis and Christos Tryfonopoulos} } @conference {DBLP:conf/esws/ChiritaIKN04, title = {Publish/Subscribe for RDF-based P2P Networks}, booktitle = {Proceedings of the 1st European Semantic Web Conference (ESWC)}, year = {2004}, pages = {182-197}, address = {Heraklion, Greece}, abstract = {Publish/subscribe systems are an alternative to query based systems in cases where the same information is asked for over and over, and where clients want to get updated answers for the same query over a period of time. Recent publish/subscribe systems such as P2P-DIET have introduced this paradigm in the P2P context. In this paper we built on the experience gained with P2P-DIET and the Edutella P2P infrastructure and present the first implementation of a P2P publish/subscribe system supporting metadata and a query language based on RDF. We define formally the basic concepts of our system and present detailed protocols for its operation. Our work utilizes the latest ideas in query processing for RDF data, P2P indexing and routing research. }, author = {Paul-Alexandru Chirita and Stratos Idreos and Manolis Koubarakis and Wolfgang Nejdl} } @conference {DBLP:conf/edbt/IdreosKT04, title = {P2P-DIET: One-Time and Continuous Queries in Super-Peer Networks}, booktitle = {Proceedings of the 9th International Conference on Extending Database Technology (EDBT)}, year = {2004}, pages = {851-853}, author = {Stratos Idreos and Manolis Koubarakis and Christos Tryfonopoulos} } @conference {DBLP:conf/edbtw/IdreosTKD04, title = {Query Processing in Super-Peer Networks with Languages Based on Information Retrieval: The P2P-DIET Approach}, booktitle = {Proceedings of the 1st Peer-to-peer Computing and Databases Workshop (P2P-DB)}, year = {2004}, pages = {496-505}, address = {Heraklion, Greece}, abstract = {This paper presents P2P-DIET, an implemented resource sharing system that unifies one-time and continuous query processing in super-peer networks P2P-DIET offers a simple data model for the description of network resources based on attributes with values of type text and a query language based on concepts from Information Retrieval The focus of this paper is on the main modelling concepts of P2P-DIET (metadata, advertisements and queries), the routing algorithms (inspired by the publish/subscibe system SIENA) and the scalable indexing of resource metadata and queries.}, author = {Stratos Idreos and Christos Tryfonopoulos and Manolis Koubarakis and Yannis Drougas} } @mastersthesis {136981, title = {P2P-DIET: A query and notification service based on mobile agents for rapid implementation of peer-to-peer applications}, year = {2003}, type = {Diploma Thesis}, author = {Stratos Idreos} } @article {DBLP:journals/sigmod/KoubarakisTID03, title = {Selective information dissemination in P2P networks: problems and solutions}, journal = {ACM SIGMOD Record, Special issue on Peer-to-Peer Data Management}, volume = {32}, number = {3}, year = {2003}, pages = {71-76}, abstract = {We study the problem of selective dissemination of information in P2P networks. We present our work on data models and languages for textual information dissemination and discuss a related P2P architecture that motivates our efforts. We also survey our results on the computational complexity of three related algorithmic problems (query satisfiability, entailment and filtering) and present efficient algorithms for the most crucial of these problems (filtering). Finally, we discuss the features of P2P-DIET, a super-peer system we have implemented at the Technical University of Crete, that realizes our vision and is able to support both ad-hoc querying and selective information dissemination scenarios in a P2P framework.}, author = {Manolis Koubarakis and Christos Tryfonopoulos and Stratos Idreos and Yannis Drougas} }