@book{abiteboul:2000a, author = "Serge Abiteboul et al", title = "Data on the Web: From Relations to Semistructured Data and XML", isbn = "1-55860-622-X", publisher "Morgan Kaufmann", year = "2000", annote = "Describes fundamental issues regarding making data available on the WWW: Data models, query languages and schemas (types). Of special relevance to our purposes are the treatment of (unstructured) data syntax, XML and typing of such data." } @article{balasubramaniam:1998a, author = "S. Balasubramaniam and Benjamin C. Pierce", title = "What is a File Synchronizer?", journal = "ACM SIG{\-}PLAN Notices", volume = "35", number = "6", pages = "26--36", month = April, year = "1998", coden = "SINODQ", ISSN = "0362-1340", bibdate = "Tue Nov 7 17:22:50 MST 2000", acknowledgement = ack-nhfb, annote = "This paper defines a framework for describing a file synchronizer, from reasoning about states of file system before and after synchronization. The focus is on synchronizing files, but CVS gets mentioned along with examples. It splits up the file synchronizing task in two distinct phases; update detection and reconciliation. It states the goal of a file synchronizer to be 'detect conflicting updates and propagate nonconflicting updates.'. The paper describes a 3-way merger with basis document O and replica A and B. The task of the update detector is to generate predicates dirty_A and dirty_B which tells if paths in file systems A and B conflicts with the source O. The second phase reconciliation, uses the source O and predicates dirty_A and dirty_B to generate two new states of the file systems A and B, A' and B'. If A' equals B' there is no conflicting updates. For the update detection phase the paper focuses on using the modtime and inode number of the file to generate the predicates. They state the goal of the reconcier to be '(1) propagate all non-conflicting updates and (2) if updates conflict, do nothing'. There is no focus on conflict resolution. The goal gets translated into a set of conditions between the start- and end states of the file systems. For the reconciliation phase an algorithm is described for which uniqueness and soundness is proved." } @article{ramsey:2001a, author = "Norman Ramsey and Elod Csirmaz", title = "An Algebraic Approach to File Synchronizing", month = "May", year = "2001", annote = "This paper defines a framework for describing a file synchronizer, from reasonong about operations preformed on each replica. The framework includes an algebra of file system operations. The focus is on files. The task is devided in three steps (1) Update detection, (2) Reconciliation and (3) Conflict resolution. From analysis of possible commands avalible on a file system, a list of operations for the algebra is developed. Semantics for the operations is defined, and soundness and completeness of the algebra is proved. The update detection step generates an edit script of operations from the algebra from the basis state and the current state of the file system. The lenght of this list of operations is minimized. The reconciliation step uses the slogans '(1) propagate all non-conflicting operations and (2) save conflicting operations for later resulotion.'. The reconcilator works on operations found by the update detector. The reconciliator determens if the lists of commands conflicts by checking if three criterias are met. It is possible to handle conflict resolution isolated and different strategies is discussed." } @masterthesis{lindholm:2001a, author = "Tancred Lindholm", title = "A 3-way Merging Algorithm for Synchronizing Ordered Trees - the 3DM merging and differencing tool for XML", month = "September", year = "2001", annote = "This master thesis is one of the latest large works on tree merging avalible. Similar to our thesis, trees is represented as XML. It is case driven - it presents different use cases and suggests reasonable merge results for each case. The use cases is syntetic, but very careful described. It descrebes a heuristic algorithm for matching trees, but the main contribution is the merging algorithm. The merging algorithm uses a pair of cursurs to keep track of where subtrees have been moved to. The algorithm is complex, it is a collection of small procedures and there is no short overall description. Much effort is put into analysing time complexity of the algorithm - it is proved that it is faster than exsisting O(nē) algorithms. While evaluating the produced algorithm, only the syntetic use cases is used for testing, which might give a twisted picture on how 3DM preforms on general XML trees. This master thesis is very important for our work since the reasearch hypothesis is that it is possible to make a universal tool for merging trees without taking into comsideration the semantics of the data, which conflicts with our research thesis." } @inproceedings{munson:1994a, author = "Jonathan P. Munson and Prasun Dewan", title = "A Flexible Object Merging Framework", booktitle = "Computer Supported Cooperative Work", pages = "231-242", year = "1994", url = "citeseer.nj.nec.com/munson94flexible.html", annote = "This paper describes merging of objects. It defines consolidation merge and reconciliation merge. It comes with a list of high level requirements of merge tools which includes, automatic differencing, interactive merging, conflict detection based on the structure of the object, semantics determined merges according to a merge policy provided by the user. Small examples in the text show contexts where semantics is needed of the merge tool to be able to produce reasonable results." } @misc{asklund:1994a, author = "U. Asklund", title = "Identifying conflicts during structural merge", text = "U. Asklund, Identifying conflicts during structural merge, Proceedings of NWPER'94, Nordic Workshop on Programming Environment Research, Lund, Sweden (June 1994).", year = "1994", url = "citeseer.nj.nec.com/asklund94identifying.html", annote = "Provides a model for solving problems that arises from optimistic concurency control. Looks on hierarchlcally structured documents. Solves the problem by keeping a edit history of the document. Gives the user the option to set simple rules for how documents should be merged." } @inproceedings{chawathe:1997b, author = {Sudarshan S. Chawathe and Hector Garcia-Molina}, title = {Meaningful change detection in structured data}, booktitle = {Proceedings of the 1997 ACM SIGMOD international conference on Management of data}, year = {1997}, isbn = {0-89791-911-4}, pages = {26--37}, location = {Tucson, Arizona, United States}, doi = {http://doi.acm.org/10.1145/253260.253266}, publisher = {ACM Press}, annote = "Adds move, copy and glue operations to the basic edit operations of chawathe:1996a. These operations work on entire sub-trees and the authors argues that they enable them to represent changes in a (for the users) more meaningful way. Change detection of of subtrees is NP-hard so a heuristic algorithm is delveloped. Gives detailed treatment of construction of edit scripts, matching algorithm and complexity analysis." } @inproceedings{chawathe:1996a, author = {Sudarshan S. Chawathe and Anand Rajaraman and Hector Garcia-Molina and Jennifer Widom}, title = {Change detection in hierarchically structured information}, booktitle = {Proceedings of the 1996 ACM SIGMOD international conference on Management of data}, year = {1996}, isbn = {0-89791-794-4}, pages = {493--504}, location = {Montreal, Quebec, Canada}, doi = {http://doi.acm.org/10.1145/233269.233366}, publisher = {ACM Press}, annote = "Discusses detection and representation of changes in hierarchically structured information. Very relevant for our purposes. The work has the folowing characteristics: Nested information, no object identifiers, comparison of replicas, high performance; which alle are in line with our needs. Presents an ordered tree data structure, generic edit operations, the notion of a (minimal conforming) edit script, and a cost model for edit scripts. Additionally algorithms for detection and matching are presented." } @inproceedings{zhang:1995a, author = "K. Zhang and J. T. L. Wang and D. Shasha", title = "On the editing distance between undirected acyclic graphs and related problems", booktitle = "Proceedings of the 6th Annual Symposium on Combinatorial Pattern Matching", number = "937", publisher = "Springer-Verlag, Berlin", address = "Espoo, Finland", editor = "Z. Galil and E. Ukkonen", pages = "395--407", year = "1995", url = "citeseer.nj.nec.com/zhang95editing.html" , annote = "Defines the distance between two connected undirected acyclic graphs as being the number of edit operations needed to transform one to the other. For our purpose the important thing in this paper is that they prove that it is a NP-hard to calculate the distance between two such trees therefore proberbly also NP-hard to merge them (since most merge tools uses edit scripts)." } @inproceedings{wallace:1999a, author = {Malcolm Wallace and Colin Runciman}, title = {Haskell and XML: generic combinators or type-based translation?}, booktitle = {Proceedings of the fourth ACM SIGPLAN international conference on Functional programming}, year = {1999}, isbn = {1-58113-111-9}, pages = {148--159}, location = {Paris, France}, doi = {http://doi.acm.org/10.1145/317636.317794}, publisher = {ACM Press}, annote = {Presents two complementary approaches to writing XML document processing applications in functional languages (generic combinators or type based translation). The generic combinartors approach is very relevant for our approaches as we plan to construct operators on graphs in an applicative language (SML). Combinators for selection, generation and transformation are desribed in detail and their algebraic laws are stated. This is very close to our intenetions.} } @inproceedings{consel:1998a, author = {Consel, C. and Marlet, R.}, title = {Architecturing software using a methodology for language development}, booktitle = {Proceedings of the 10th International Symposium on Programming Language Implementation and Logic Programming}, editor = {Palamidessi, C. and Glaser, H. and Meinke, K.}, month = SEP, year = 1998, address = {Pisa, Italy}, series = {Lecture Notes in Computer Science}, volume = 1490, pages = {170--194}, keyword = {DSL}, annote = {(highly relevant) Presents and elaborates on a methodology for developing DSLs. Their methodolgy is taken from GPL development, but they argue that their framework can express software architecture concerns as well. From a programming language perspective, DSLs are claimed to be more attractive than GPLs for many applications since they provide for easier programming, systematic reuse of common operations, and easier verification. From a software architecture perspective, DSLs can be regarded as a parameterization mechanism, and an interface to a program library. The rest of the paper discuss each phase of the proposed methodology and provides examples as well. The phases are: Language analysis, interface definitions, staged semantics, formal definition, abstract machine, implementation, partial evaluation.} } @phdthesis{thibault:1998a, author = "Scott Thibault", title = "Domain-Specific Languages: Conception, Implementation, and Application", month = "October", year = "1998", annote = {Describes the development of two DSLs as well as a DSL design methodology. The key concepts in this methodology are domain analysis (identification of reusable entities, abstraction or generalization, and systematic reuse), family/commonality analysis (terminology, identification of commonalities, and variabilities). It is argued, that DSL benefits are obtained through familiar program notation, design reuse, high-level abstraction, and program analysis. The latter is due to restricted expressiveness and makes automatic program verification a realistic goal. DSLs are argued to be superior to GPLs in many scenarios due to domain specific abstraction mechanisms, a "rich" execution model, and (possibly) )controlled access to storage.} } @article{deursen:2000a, author = {Arie {van Deursen} and Paul Klint and Joost Visser}, title = {Domain-specific languages: an annotated bibliography}, journal = {SIGPLAN Not.}, volume = {35}, number = {6}, year = {2000}, issn = {0362-1340}, pages = {26--36}, doi = {http://doi.acm.org/10.1145/352029.352035}, publisher = {ACM Press}, annote = {Provides a bibliography of 75 publications on DSLs as well as a rather brief treatment of terminology, risks and benefits, design methodologies, and implementation techniques. Gives three solution types for solving problems in well defined problem domains: Subroutine libraries, object oriented frameworks and component frameworks, DSLs. The authors propose the following definition of a DSL: A DSL is programming language or executable specification language that offers, through appropriate notatins and abstractions, expressive power focused on, and usually restricted to, a particular problem domain. Argues that DSLs are usually declarative and therefore they can be regarded as specification languages as well. Very briefly touches upon an analysis model for developing DSLs, but does not add anything of significance to consel:1998a in this area.} } @inproceedings{buneman:1997a, author = "Peter Buneman and Susan B. Davidson and Mary F. Fernandez and Dan Suciu", title = "Adding Structure to Unstructured Data", booktitle = "Database Theory---{ICDT}'97, 6th International Conference", volume = "1186", month = "8--10~", publisher = "Springer", address = "Delphi, Greece", editor = "Foto N. Afrati and Phokion Kolaitis", pages = "336--350", year = "1997", url = "citeseer.nj.nec.com/buneman97adding.html", annote = "Adding structure to unstructered data Develops a schema for unstructered data. Gives formal definition of graph databases and graph schemas. Provides formal treatment of queries against unstructered data. Asesses expressiveness of proposed graph schemas. Has theoretical bias. Could be very relevant as a theretical framework for thourough analysis of interplay between relational (structured) data and xml." } @inproceedings{buneman:1995, author = "Peter Buneman and Susan B. Davidson and Dan Suciu", title = "Programming Constructs for Unstructured Data", booktitle = "Workshop on Database Programming Languages", pages = "12", year = "1995", url = "citeseer.nj.nec.com/buneman95programming.html", annote = "Discusses languages for querying unstructured data. Introduces a labeled tree data model and investigates programming constructs for working on that strucure. The authors argue that such a data structure is advantagous for querying unstructured - and semistructured data. Examples of functional programming constructs on the tree data type is presented and a natural operational semntics is given. This is very similar what we intend to accomplish and we can use this as a template for the prototyping of our own operators." } @misc{chawathe:1997a, author = "S. Chawathe and H. Garcia-Molina", title = "An expressive model for comparing tree-structured data", text = "S. Chawathe and H. Garcia-Molina. An expressive model for comparing tree-structured data. Technical report, Stanford University Database Group, 1997. Available at http://www-db.stanford.edu/.", year = "1997", url = "citeseer.nj.nec.com/chawathe97expressive.html", annote = "Studies comparison of tree structured data. Focuses on rooted, unordered labeled trees. Criticises linear edit scripts for being unintuitive. Proposes 'transformations' instead, a form of 'parallel' edit scripts. implements copy, move, and glue (all subtree operations) in terms of these concepts. The developed algorithm works on representative signatures. These signatures can be used to infer optimal transformations (in terms of sequences of operations). Has an appendix that provides extensive treatment and proofs of edge coverings, matching and various tree algorithm related subjects." } @misc{mogin:2003, author = "Pavle Mogin", title = "XML Storage Techniques", howpublished = "Lecture notes", year = "2003", annote = "Provides an overview of XML storage techniques and - issues. Defines the notions of document- and data centric XML documents." } @misc{sheard:1998a, author = "T. Sheard and Z. Benaissa", title = "From Interpreter to Compiler using Staging and Monads", text = "Pacific Software Research Center, Oregon Graduate Institute.", year = "1998", url = "citeseer.nj.nec.com/sheard98from.html" } @inproceedings{sheard:1999a, author = {Tim Sheard and Zine-el-abidine Benaissa and Emir Pasalic}, title = {DSL implementation using staging and monads}, booktitle = {Proceedings of the 2nd conference on Domain-specific languages}, year = {1999}, isbn = {1-58113-255-7}, pages = {81--94}, location = {Austin, Texas, United States}, doi = {http://doi.acm.org/10.1145/331960.331975}, publisher = {ACM Press} }