include/llvm/Support/YAMLParser.h - Issue 5604054: YAML Parser

Unified Diff: include/llvm/Support/YAMLParser.h

Issue 5604054: YAML Parser

Patch Set: Update Created 13 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.

Jump to:

View side-by-side diff with in-line comments

Index: include/llvm/Support/YAMLParser.h

diff --git a/include/llvm/Support/YAMLParser.h b/include/llvm/Support/YAMLParser.h

new file mode 100644

index 0000000000000000000000000000000000000000..1b77f896af8adcd0fcaae10489a23f99fd6deb61

--- /dev/null

+++ b/include/llvm/Support/YAMLParser.h

@@ -0,0 +1,498 @@

+//===--- YAMLParser.h - Simple YAML parser --------------------------------===//

+//

+// The LLVM Compiler Infrastructure

+//

+// This file is distributed under the University of Illinois Open Source

+// License. See LICENSE.TXT for details.

+//

+//===----------------------------------------------------------------------===//

+//

+// This is a YAML 1.2 parser.

+//

+// See http://www.yaml.org/spec/1.2/spec.html for the full standard.

+//

+// This currently does not implement the following:

+// * Multi-line literal folding.

+// * Tag resolution.

+// * UTF-16.

+// * BOMs anywhere other than the first Unicode scalar value in the file.

+//

+// The most important class here is Stream. This represents a YAML stream with

+// 0, 1, or many documents.

+//

+// SourceMgr sm;

+// StringRef input = getInput();

+// yaml::Stream stream(input, sm);

+//

+// for (yaml::document_iterator di = stream.begin(), de = stream.end();

+// di != de; ++di) {

+// yaml::Node *n = di->getRoot();

+// if (n) {

+// // Do something with n...

+// } else

+// break;

+// }

+//

+//===----------------------------------------------------------------------===//

+#ifndef LLVM_SUPPORT_YAML_PARSER_H

+#define LLVM_SUPPORT_YAML_PARSER_H

+#include "llvm/ADT/OwningPtr.h"

+#include "llvm/ADT/StringRef.h"

+#include "llvm/Support/Allocator.h"

+#include "llvm/Support/Casting.h"

+#include <utility>

+namespace llvm {

+class MemoryBuffer;

+class SourceMgr;

+class raw_ostream;

+class Twine;

+namespace yaml {

+class document_iterator;

+class Document;

+class Scanner;

+struct Token;

+/// @brief Dump all the tokens in this stream to OS.

+/// @returns true if there was an error, false otherwise.

+bool dumpTokens(StringRef Input, raw_ostream &);

+/// @brief Scans all tokens in input without outputting anything. This is used

+/// for benchmarking the tokenizer.

+/// @returns true if there was an error, false otherwise.

+bool scanTokens(StringRef Input);

+/// @brief Escape \a Input for a double quoted scalar.

+std::string escape(StringRef Input);

klimek 2012/02/16 15:52:35 This is not called anywhere yet(?) Why do we need

Bigcheesegs 2012/02/16 22:32:42 It's used when writing out YAML files.

+/// @brief This class represents a YAML stream potentially containing multiple

+/// documents.

+class Stream {

+public:

+ Stream(StringRef Input, SourceMgr &);

+ ~Stream();

+ document_iterator begin();

+ document_iterator end();

+ void skip();

+ bool failed();

+ bool validate() {

+ skip();

+ return !failed();

+ }

+private:

+ OwningPtr<Scanner> scanner;

klimek 2012/02/16 15:52:35 I would strongly argue for consistency and rather

+ Document *CurrentDoc;

+ friend class Document;

+ /// @brief Validate a %YAML x.x directive.

+ void handleYAMLDirective(const Token &);

+};

+/// @brief Abstract base class for all Nodes.

+class Node {

+public:

+ enum NodeKind {

+ NK_Null,

+ NK_Scalar,

+ NK_KeyValue,

+ NK_Mapping,

+ NK_Sequence,

+ NK_Alias

+ };

+ Node(unsigned int Type, Document *, StringRef Anchor);

+ virtual ~Node();

+ /// @brief Get the value of the anchor attached to this node. If it does not

+ /// have one, getAnchor().size() will be 0.

+ StringRef getAnchor() const { return Anchor; }

+ // These functions forward to Document and Scanner.

+ Token &peekNext();

+ Token getNext();

+ Node *parseBlockNode();

+ BumpPtrAllocator &getAllocator();

+ void setError(const Twine &Message, Token &Location) const;

+ bool failed() const;

+ virtual void skip() {};

+ unsigned int getType() const { return TypeID; }

+ static inline bool classof(const Node *) { return true; }

+protected:

+ Document *Doc;

+private:

+ unsigned int TypeID;

+ StringRef Anchor;

+};

+/// @brief A null value.

+class NullNode : public Node {

+public:

+ NullNode(Document *D) : Node(NK_Null, D, StringRef()) {}

+ static inline bool classof(const NullNode *) { return true; }

+ static inline bool classof(const Node *N) {

+ return N->getType() == NK_Null;

+ }

+};

+/// @brief A scalar node is an opaque datum that can be presented as a

+/// series of zero or more Unicode scalar values.

+class ScalarNode : public Node {

+public:

+ ScalarNode(Document *D, StringRef Anchor, StringRef Val)

+ : Node(NK_Scalar, D, Anchor)

+ , Value(Val)

+ {}

+ // Return Value without any escaping or folding or other fun YAML stuff. This

+ // is the exact bytes that are contained in the file (after conversion to

+ // utf8).

+ StringRef getRawValue() const { return Value; }

+ /// @brief Gets the value of this node as a StringRef.

+ ///

+ /// @param Storage is used to store the content of the returned StringRef iff

+ /// it requires any modifcation from how it appeared in the source.

+ /// This happens with escaped characters and multi-line literals.

+ StringRef getValue(SmallVectorImpl<char> &Storage) const;

+ static inline bool classof(const ScalarNode *) { return true; }

+ static inline bool classof(const Node *N) {

+ return N->getType() == NK_Scalar;

+ }

+private:

+ StringRef Value;

+};

+/// @brief A key and value pair. While not technically a Node under the YAML

+/// representation graph, it is easier to treat them this way.

+///

+/// TODO: Consider making this not a child of Node.

+class KeyValueNode : public Node {

+public:

+ KeyValueNode(Document *D)

+ : Node(NK_KeyValue, D, StringRef())

+ , Key(0)

+ , Value(0)

+ {}

+ /// @brief Parse and return the key.

+ ///

+ /// This may be called multiple times.

+ ///

+ /// @returns The key, or nullptr if failed() == true.

+ Node *getKey();

+ /// @brief Parse and return the value.

+ ///

+ /// This may be called multiple times.

+ ///

+ /// @returns The value, or nullptr if failed() == true.

+ Node *getValue();

+ virtual void skip() {

+ getKey()->skip();

+ getValue()->skip();

+ }

+ static inline bool classof(const KeyValueNode *) { return true; }

+ static inline bool classof(const Node *N) {

+ return N->getType() == NK_KeyValue;

+ }

+private:

+ Node *Key;

+ Node *Value;

+};

+/// @brief This is an iterator abstraction over YAML collections shared by both

+/// sequences and maps.

+///

+/// BaseT must have a ValueT* member named CurrentEntry and a member function

+/// increment() which must set CurrentEntry to 0 to create an end iterator.

+template <class BaseT, class ValueT>

+class basic_collection_iterator

+ : public std::iterator<std::forward_iterator_tag, ValueT> {

+public:

+ basic_collection_iterator() : Base(0) {}

+ basic_collection_iterator(BaseT *B) : Base(B) {}

+ ValueT *operator ->() const {

+ assert(Base && Base->CurrentEntry && "Attempted to access end iterator!");

+ return Base->CurrentEntry;

+ }

+ ValueT &operator *() const {

+ assert(Base && Base->CurrentEntry &&

+ "Attempted to dereference end iterator!");

+ return *Base->CurrentEntry;

+ }

+ operator ValueT*() const {

+ assert(Base && Base->CurrentEntry && "Attempted to access end iterator!");

+ return Base->CurrentEntry;

+ }

+ bool operator !=(const basic_collection_iterator &Other) const {

+ if(Base != Other.Base)

+ return true;

+ return (Base && Other.Base) && Base->CurrentEntry

+ != Other.Base->CurrentEntry;

+ }

+ basic_collection_iterator &operator++() {

+ assert(Base && "Attempted to advance iterator past end!");

+ Base->increment();

+ // Create an end iterator.

+ if (Base->CurrentEntry == 0)

+ Base = 0;

+ return *this;

+ }

+private:

+ BaseT *Base;

+};

+// The following two templates are used for both MappingNode and Sequence Node.

+template <class CollectionType>

+typename CollectionType::iterator begin(CollectionType &C) {

+ assert(C.IsAtBeginning && "You may only iterate over a collection once!");

+ C.IsAtBeginning = false;

+ typename CollectionType::iterator ret(&C);

+ ++ret;

+ return ret;

+template <class CollectionType>

+void skip(CollectionType &C) {

+ // TODO: support skipping from the middle of a parsed collection ;/

+ assert((C.IsAtBeginning || C.IsAtEnd) && "Cannot skip mid parse!");

+ if (C.IsAtBeginning)

+ for (typename CollectionType::iterator i = begin(C), e = C.end();

+ i != e; ++i)

+ i->skip();

+/// @brief Represents a YAML map created from either a block map for a flow map.

+///

+/// This parses the YAML stream as increment() is called.

+class MappingNode : public Node {

+public:

+ enum MappingType {

+ MT_Block,

+ MT_Flow,

+ MT_Inline //< An inline mapping node is used for "[key: value]".

+ };

+ MappingNode(Document *D, StringRef Anchor, MappingType MT)

+ : Node(NK_Mapping, D, Anchor)

+ , Type(MT)

+ , IsAtBeginning(true)

+ , IsAtEnd(false)

+ , CurrentEntry(0)

+ {}

+ friend class basic_collection_iterator<MappingNode, KeyValueNode>;

+ typedef basic_collection_iterator<MappingNode, KeyValueNode> iterator;

+ template <class T> friend typename T::iterator yaml::begin(T &);

+ template <class T> friend void yaml::skip(T &);

+ iterator begin() {

+ return yaml::begin(*this);

+ }

+ iterator end() { return iterator(); }

+ virtual void skip() {

+ yaml::skip(*this);

+ }

+ static inline bool classof(const MappingNode *) { return true; }

+ static inline bool classof(const Node *N) {

+ return N->getType() == NK_Mapping;

+ }

+private:

+ MappingType Type;

+ bool IsAtBeginning;

+ bool IsAtEnd;

+ KeyValueNode *CurrentEntry;

+ void increment();

+};

+/// @brief Represents a YAML sequence created from either a block sequence for a

+/// flow sequence.

+///

+/// This parses the YAML stream as increment() is called.

+class SequenceNode : public Node {

+public:

+ enum SequenceType {

+ ST_Block,

+ ST_Flow,

+ // Use for:

+ //

+ // key:

+ // - val1

+ // - val2

+ //

+ // As a BlockMappingEntry and BlockEnd are not created in this case.

+ ST_Indentless

+ };

+ SequenceNode(Document *D, StringRef Anchor, SequenceType ST)

+ : Node(NK_Sequence, D, Anchor)

+ , SeqType(ST)

+ , IsAtBeginning(true)

+ , IsAtEnd(false)

+ , WasPreviousTokenFlowEntry(true) // Start with an imaginary ','.

+ , CurrentEntry(0)

+ {}

+ friend class basic_collection_iterator<SequenceNode, Node>;

+ typedef basic_collection_iterator<SequenceNode, Node> iterator;

+ template <class T> friend typename T::iterator yaml::begin(T &);

+ template <class T> friend void yaml::skip(T &);

+ void increment();

+ iterator begin() {

+ return yaml::begin(*this);

+ }

+ iterator end() { return iterator(); }

+ virtual void skip() {

+ yaml::skip(*this);

+ }

+ static inline bool classof(const SequenceNode *) { return true; }

+ static inline bool classof(const Node *N) {

+ return N->getType() == NK_Sequence;

+ }

+private:

+ SequenceType SeqType;

+ bool IsAtBeginning;

+ bool IsAtEnd;

+ bool WasPreviousTokenFlowEntry;

+ Node *CurrentEntry;

+};

+/// @brief Represents an alias to a Node with an anchor.

+class AliasNode : public Node {

+public:

+ AliasNode(Document *D, StringRef Val)

+ : Node(NK_Alias, D, StringRef()), Name(Val) {}

+ StringRef getName() const { return Name; }

+ Node *getTarget();

+ static inline bool classof(const ScalarNode *) { return true; }

+ static inline bool classof(const Node *N) {

+ return N->getType() == NK_Alias;

+ }

+private:

+ StringRef Name;

+};

+/// @brief A YAML Stream is a sequence of Documents. A document contains a root

+/// node.

+class Document {

+public:

+ /// @brief Root for parsing a node. Returns a single node.

+ Node *parseBlockNode();

+ Document(Stream &ParentStream);

+ /// @brief Finish parsing the current document and return true if there are

+ /// more. Return false otherwise.

+ bool skip();

+ /// @brief Parse and return the root level node.

+ Node *getRoot() {

+ if (Root)

+ return Root;

+ return Root = parseBlockNode();

+ }

+private:

+ friend class Node;

+ friend class document_iterator;

+ /// @brief Stream to read tokens from.

+ Stream &stream;

+ /// @brief Used to allocate nodes to. All are destroyed without calling their

+ /// destructor when the document is destroyed.

+ BumpPtrAllocator NodeAllocator;

+ /// @brief The root node. Used to support skipping a partially parsed

+ /// document.

+ Node *Root;

+ Token &peekNext();

+ Token getNext();

+ void setError(const Twine &Message, Token &Location) const;

+ bool failed() const;

+ void handleTagDirective(const Token &Tag) {

+ // TODO: Track tags.

+ }

+ /// @brief Parse %BLAH directives and return true if any were encountered.

+ bool parseDirectives();

+ /// @brief Consume the next token and error if it is not \a TK.

+ bool expectToken(int TK);

+};

+/// @brief Iterator abstraction for Documents over a Stream.

+class document_iterator {

+public:

+ document_iterator() : Doc(NullDoc) {}

+ document_iterator(Document *&D) : Doc(D) {}

+ bool operator !=(const document_iterator &Other) {

+ return Doc != Other.Doc;

+ }

+ document_iterator operator ++() {

+ if (!Doc->skip()) {

+ delete Doc;

+ Doc = 0;

+ } else {

+ Stream &S = Doc->stream;

+ delete Doc;

+ Doc = new Document(S);

+ }

+ return *this;

+ }

+ Document *operator ->() {

+ return Doc;

+ }

+private:

+ static Document *NullDoc;

+ Document *&Doc;

+};

+#endif

« no previous file with comments | « LICENSE.TXT ('k') | lib/Support/CMakeLists.txt » ('j') | lib/Support/YAMLParser.cpp » ('J')