From 25bc17e3f762e1cce60eff52a65e0cd6357fae71 Mon Sep 17 00:00:00 2001
From: Elliot Metsger <emetsger@jhu.edu>
Date: Wed, 9 Sep 2015 11:33:30 -0400
Subject: [PATCH 1/9] DC-2101: Initial BagIt module poms.

---
 dcs-bagit/dcs-bagit-support/pom.xml |  83 ++++++++++++++++++++
 dcs-bagit/pom.xml                   | 113 ++++++++++++++++++++++++++++
 2 files changed, 196 insertions(+)
 create mode 100644 dcs-bagit/dcs-bagit-support/pom.xml
 create mode 100644 dcs-bagit/pom.xml
diff --git a/dcs-bagit/dcs-bagit-support/pom.xml b/dcs-bagit/dcs-bagit-support/pom.xml
new file mode 100644
index 00000000..fc12c29b
--- /dev/null
+++ b/dcs-bagit/dcs-bagit-support/pom.xml
@@ -0,0 +1,83 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Copyright 2015 Johns Hopkins University
+  ~
+  ~ Licensed under the Apache License, Version 2.0 (the "License");
+  ~ you may not use this file except in compliance with the License.
+  ~ You may obtain a copy of the License at
+  ~
+  ~     http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+  <!-- ======================================================== -->
+  <!-- Data Conservancy BagIt Support  ======================== -->
+  <!-- ======================================================== -->
+  
+  <modelVersion>4.0.0</modelVersion>
+  
+  <name>Data Conservancy BagIt Support</name>
+  <description>Support classes for BagIt implementations and clients</description>
+  
+  <groupId>org.dataconservancy</groupId>
+  <artifactId>dcs-bagit-support</artifactId>
+  <packaging>jar</packaging>
+  
+  <parent>
+    <groupId>org.dataconservancy</groupId>
+    <artifactId>dcs-bagit</artifactId>
+    <version>1.0.0-SNAPSHOT</version>
+  </parent>
+
+  <!-- Build  ===================================================== -->
+
+  <build> </build>
+
+
+  <!-- Dependencies =============================================== -->
+
+  <dependencies>
+    
+    <!-- Provided ================================================= -->
+
+    <!-- Runtime ================================================== -->
+
+    <!-- Compile ================================================== -->
+
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+      <scope>compile</scope>
+    </dependency>
+
+    <!-- Test ===================================================== -->
+
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-log4j12</artifactId>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>log4j</groupId>
+      <artifactId>log4j</artifactId>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+
+  </dependencies>
+
+</project>
diff --git a/dcs-bagit/pom.xml b/dcs-bagit/pom.xml
new file mode 100644
index 00000000..1f8d95bd
--- /dev/null
+++ b/dcs-bagit/pom.xml
@@ -0,0 +1,113 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Copyright 2015 Johns Hopkins University
+  ~
+  ~ Licensed under the Apache License, Version 2.0 (the "License");
+  ~ you may not use this file except in compliance with the License.
+  ~ You may obtain a copy of the License at
+  ~
+  ~     http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+  <!-- ======================================================== -->
+  <!-- Data Conservancy BagIt Packaging Parent POM ============ -->
+  <!-- ======================================================== -->
+
+  <modelVersion>4.0.0</modelVersion>
+
+  <name>Data Conservancy BagIt Tools and Utilities</name>
+  <description>Data Conservancy BagIt packaging tools and utilities</description>
+
+  <groupId>org.dataconservancy</groupId>
+  <artifactId>dcs-bagit</artifactId>
+  <version>1.0.0-SNAPSHOT</version>
+  <packaging>pom</packaging>
+
+
+  <parent>
+    <groupId>org.dataconservancy</groupId>
+    <artifactId>project-pom</artifactId>
+    <version>1.1.2-SNAPSHOT</version>
+  </parent>
+
+  <!-- Modules  =================================================== -->
+
+  <modules>
+    <module>dcs-bagit-support</module>
+    <module>dcs-bagit-vfs</module>
+    <module>dcs-bagit-compress</module>
+  </modules>
+
+  <!-- Build  ===================================================== -->
+
+  <build> </build>
+
+  <!-- Dependency Management ====================================== -->
+
+  <dependencyManagement>
+
+    <dependencies>
+
+      <dependency>
+        <groupId>org.apache.commons</groupId>
+        <artifactId>commons-vfs2</artifactId>
+        <version>2.1-SNAPSHOT</version>
+      </dependency>
+      
+      <dependency>
+        <groupId>org.apache.commons</groupId>
+        <artifactId>commons-compress</artifactId>
+        <version>1.10</version>
+      </dependency>
+
+      <dependency>
+        <groupId>junit</groupId>
+        <artifactId>junit</artifactId>
+        <version>4.12</version>
+      </dependency>
+
+    </dependencies>
+  </dependencyManagement>
+
+  <!-- Dependencies =============================================== -->
+
+  <dependencies>
+
+    <!-- Provided ================================================= -->
+
+    <!-- Runtime ================================================== -->
+
+    <!-- Compile ================================================== -->
+
+    <!-- Test ===================================================== -->
+
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-log4j12</artifactId>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>log4j</groupId>
+      <artifactId>log4j</artifactId>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+
+  </dependencies>
+
+</project>

From bf9a1e4d73b9106aeea7cf0cbb5de64529537e83 Mon Sep 17 00:00:00 2001
From: Elliot Metsger <emetsger@jhu.edu>
Date: Wed, 9 Sep 2015 11:34:08 -0400
Subject: [PATCH 2/9] DC-2101: Initial BagUri class.

---
 .../dataconservancy/bagit/support/BagUri.java | 174 ++++++++++++++++++
 1 file changed, 174 insertions(+)
 create mode 100644 dcs-bagit/dcs-bagit-support/src/main/java/org/dataconservancy/bagit/support/BagUri.java

diff --git a/dcs-bagit/dcs-bagit-support/src/main/java/org/dataconservancy/bagit/support/BagUri.java b/dcs-bagit/dcs-bagit-support/src/main/java/org/dataconservancy/bagit/support/BagUri.java
new file mode 100644
index 00000000..407cb1fb
--- /dev/null
+++ b/dcs-bagit/dcs-bagit-support/src/main/java/org/dataconservancy/bagit/support/BagUri.java
@@ -0,0 +1,174 @@
+/*
+ * Copyright 2015 Johns Hopkins University
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.dataconservancy.bagit.support;
+
+import java.net.URI;
+import java.net.URISyntaxException;
+
+/**
+ * URI scheme for addressing resources contained within a Bag.  The form of a Bag URI is:
+ * {@code bag://<bag-name>/path/to/resource#optional-fragment}
+ * <p>
+ * The Bag URI {@link #BAG_SCHEME scheme} is equal to the string '{@code bag}'; resources inside the bag are unique
+ * within the scope of a single bag. The {@code authority} component of a Bag URI is equal to the name of the Bag
+ * serialization (as discussed in <a href="https://www.ietf.org/id/draft-kunze-bagit-11.txt">BagIt</a>
+ * section 4), minus any file name extensions.  Query parameters are disallowed in Bag URIs, as they have no semantic
+ * analog in the BagIt specification.
+ * </p>
+ *
+ * @see <a href="https://www.ietf.org/id/draft-kunze-bagit-11.txt">BagIt Draft Specification version 0.97, expires December 25, 2015</a>
+ * @see <a href="https://www.ietf.org/rfc/rfc2396.txt">RFC 2396: Uniform Resource Identifiers (URI): Generic Syntax</a>
+ * @see <a href="http://dataconservancy.org/placeholder/for/bagit/profile">Data Conservancy BagIt Profile 1.0, section X</a>
+ */
+public class BagUri {
+
+    /**
+     * The value of the Bag URI {@code scheme} (RFC 2396 sec. 3)
+     */
+    public static final String BAG_SCHEME = "bag";
+
+    /**
+     * Characters that are reserved (i.e. illegal) for URI authority portion (RFC 2396 sec. 3.2)
+     */
+//    private static final char[] RESERVED_AUTHORITY_CHARACTERS = new char[] { ';', ':', '@', '?', '/' };
+
+    private static final String ERR_NULL = "Argument '%s' must not be null or empty.";
+
+    private static final String ERR_INVALID_SCHEME = "Invalid scheme '%s' for " + BagUri.class.getName() + ": scheme " +
+            "must be equal to '" + BAG_SCHEME + "'";
+
+    private static final String ERR_PARSE_URI = "Unable to parse URI string '%s': %s";
+
+    private static final String ERR_CREATE_URI = "Unable to construct a URI with scheme '%s', authority '%s', path '%s', and fragment '%s': %s";
+
+    /**
+     * Internal representation of the BagUri as a java.net.URI.
+     */
+    private URI bagUri;
+
+    /**
+     * The authority string (must not be {@code null}).  It semantically aligns with, and should be equal to, the name
+     * of the bag.  We keep this state for our own equals() and hashCode() implementation.
+     */
+    private String authority;
+
+    /**
+     * The path string (may be {@code null}).  We keep this state for our own equals() and hashCode() implementation.
+     */
+    private String path;
+
+    /**
+     * The fragment string (may be {@code null}).  We keep this state for our own equals() and hashCode()
+     * implementation.
+     */
+    private String fragment;
+
+    /**
+     * Constructs a new Bag URI, which addresses a resource in a Bag named by {@code authority}.
+     * <p>
+     * Exemplars:<br/>
+     * <ul>
+     * <li>The {@code path} "data" with {@code authority} "mybag" would address the data directory inside
+     * of a Bag named 'mybag': {@code bag://mybag/data}.</li>
+     * <li>The {@code path} "bag-info.txt" would identify the Bag metadata file: {@code bag://mybag/bag-info.txt}.</li>
+     * <li>The {@code path} "data/dataobject.rdf" with a {@code fragment} "#obj-3" would identify a resource
+     * "{@code obj-3}" inside of the payload file {@code data/dataobject.rdf}:
+     * {@code bag://mybag/data/dataobject#obj-3}.</li>
+     * </ul>
+     * </p>
+     *
+     * @param authority the authority portion of the URI, which is expected to be the Bag name.  Must not be
+     *                  {@code null}.
+     * @param path the path to the resource within the Bag
+     * @param fragment an optional fragment identifier, useful for referencing individual resources within a file
+     * @throws java.lang.IllegalArgumentException if any required parameters are {@code null} or invalid URI components.
+     */
+    public BagUri(String authority, String path, String fragment) {
+        if (authority == null || authority.trim().length() == 0) {
+            throw new IllegalArgumentException(String.format(ERR_NULL, "authority"));
+        }
+        try {
+            bagUri = new URI(BAG_SCHEME, authority, path, null, fragment);
+        } catch (URISyntaxException e) {
+            throw new IllegalArgumentException(
+                    String.format(ERR_CREATE_URI, BAG_SCHEME, authority, path, fragment, e.getMessage()), e);
+        }
+
+        this.authority = authority;
+        this.path = path;
+        this.fragment = fragment;
+    }
+
+    /**
+     * TODO javadoc
+     * @return
+     */
+    public String getAuthority() {
+        return bagUri.getAuthority();
+    }
+
+    /**
+     * TODO javadoc
+     * @return
+     */
+    public String getFragment() {
+        return bagUri.getFragment();
+    }
+
+    /**
+     * TODO javadoc
+     * @return
+     */
+    public String getPath() {
+        return bagUri.getPath();
+    }
+
+    public URI asUri() {
+        return bagUri;
+    }
+
+    /**
+     * {@inheritDoc}
+     * <p>
+     * Instances of this class are considered equal if their authority, path, and fragment components are equal.
+     * </p>
+     *
+     * @param o the object to determine equivalence against.
+     * @return {@code true} if the instances are equal, {@code false} otherwise
+     */
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) return true;
+        if (o == null || getClass() != o.getClass()) return false;
+
+        BagUri bagUri = (BagUri) o;
+
+        if (authority != null ? !authority.equals(bagUri.authority) : bagUri.authority != null) return false;
+        if (fragment != null ? !fragment.equals(bagUri.fragment) : bagUri.fragment != null) return false;
+        if (path != null ? !path.equals(bagUri.path) : bagUri.path != null) return false;
+
+        return true;
+    }
+
+    @Override
+    public int hashCode() {
+        int result = authority != null ? authority.hashCode() : 0;
+        result = 31 * result + (path != null ? path.hashCode() : 0);
+        result = 31 * result + (fragment != null ? fragment.hashCode() : 0);
+        return result;
+    }
+}

From e0c38dbc7ef7671b623455116f7493ac62d2cb00 Mon Sep 17 00:00:00 2001
From: Elliot Metsger <emetsger@jhu.edu>
Date: Wed, 9 Sep 2015 11:35:41 -0400
Subject: [PATCH 3/9] DC-2101: Roles played by files or directories in a bag.

---
 .../bagit/support/BagFileRole.java            | 82 +++++++++++++++++++
 1 file changed, 82 insertions(+)
 create mode 100644 dcs-bagit/dcs-bagit-support/src/main/java/org/dataconservancy/bagit/support/BagFileRole.java

diff --git a/dcs-bagit/dcs-bagit-support/src/main/java/org/dataconservancy/bagit/support/BagFileRole.java b/dcs-bagit/dcs-bagit-support/src/main/java/org/dataconservancy/bagit/support/BagFileRole.java
new file mode 100644
index 00000000..ec33bb84
--- /dev/null
+++ b/dcs-bagit/dcs-bagit-support/src/main/java/org/dataconservancy/bagit/support/BagFileRole.java
@@ -0,0 +1,82 @@
+/*
+ * Copyright 2015 Johns Hopkins University
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.dataconservancy.bagit.support;
+
+/**
+ * Roles of tag files seen in a Bag.  Roles are defined independently of concrete files because: 1) a single role may
+ * be served by multiple files; 2) names of files that fulfill these roles may change.
+ * <p>
+ * While changing the name of the {@code bagit.txt} file is hard to imagine, it is reasonable that additional files may
+ * fulfill a particular role as the BagIt specification evolves.  If the name of {@code bagit.txt} does change, it is
+ * likely that the role of a bag declaration will continue to be needed, even if it is not longer fulfilled by
+ * {@code bagit.txt}.
+ * </p>
+ * <p>
+ * BagIt requires that there be a bag declaration and a pay load manifest.  These roles are enumerated in this class as
+ * {@link #BAG_DECL} and {@link #PAYLOAD_MANIFEST}, respectively.  Other roles such as a tag manifest, payload
+ * directory, and fetch file are enumerated in this class.  A payload manifest role may be fulfilled by two different
+ * files, a {@code manifest-sha1.txt} file containing SHA checksums, and a {@code manifest-md5.txt} file containing MD5
+ * checksums.  In the future, implementations may use SHA-256 or other algorithms.  Regardless of the name of future
+ * files, their role will be enumerated in this class.
+ * </p>
+ * <p>
+ * The documentation for each role includes example file names from the specification, and are informative.  These are
+ * meant to be examples in aiding the comprehension of what the role represents; they are not normative.
+ * </p>
+ */
+public enum BagFileRole {
+
+    /**
+     * The bag payload (e.g. {@code data/}) directory.
+     */
+    PAYLOAD_DIRECTORY,
+
+    /**
+     * Bag payload itself (e.g. content in the {@code data/} directory.
+     */
+    PAYLOAD_CONTENT,
+
+    /**
+     * Tag file corresponding to the {@code bagit.txt} file, at the base of the bag.
+     */
+    BAG_DECL,
+
+    /**
+     * Tag file corresponding to the {@code bag-info.txt} file, at the base of the bag.
+     */
+    BAG_INFO,
+
+    /**
+     * Tag file(s) corresponding to the payload {@code manifest-&lt;algorithm&gt;.txt} file, at the base of the bag.
+     */
+    PAYLOAD_MANIFEST,
+
+    /**
+     * Tag file(s) corresponding to the {@code tagmanifest-&lt;algorithm&gt;.txt} file, at the base of the bag.
+     */
+    TAG_MANIFEST,
+
+    /**
+     * Tag file corresponding to the {@code fetch.txt} file, at the base of the bag.
+     */
+    FETCH,
+
+    /**
+     * Tag files corresponding to additional tag files, not covered by the BagIt specification.
+     */
+    OTHER_TAG
+}

From 2715c44123c5d58cd05947a684080c8c54cceb8d Mon Sep 17 00:00:00 2001
From: Elliot Metsger <emetsger@jhu.edu>
Date: Wed, 9 Sep 2015 22:11:57 -0400
Subject: [PATCH 4/9] DC-2101: Inital Token class and tests.  Tokens are
 strings with special meaning used to make up expressions.

---
 .../dataconservancy/bagit/rules/Token.java    | 138 ++++++++++++++++++
 .../bagit/rules/TokenTest.java                |  92 ++++++++++++
 2 files changed, 230 insertions(+)
 create mode 100644 dcs-bagit/dcs-bagit-support/src/main/java/org/dataconservancy/bagit/rules/Token.java
 create mode 100644 dcs-bagit/dcs-bagit-support/src/test/java/org/dataconservancy/bagit/rules/TokenTest.java

diff --git a/dcs-bagit/dcs-bagit-support/src/main/java/org/dataconservancy/bagit/rules/Token.java b/dcs-bagit/dcs-bagit-support/src/main/java/org/dataconservancy/bagit/rules/Token.java
new file mode 100644
index 00000000..af8f7643
--- /dev/null
+++ b/dcs-bagit/dcs-bagit-support/src/main/java/org/dataconservancy/bagit/rules/Token.java
@@ -0,0 +1,138 @@
+/*
+ *
+ *  * Copyright 2015 Johns Hopkins University
+ *  *
+ *  * Licensed under the Apache License, Version 2.0 (the "License");
+ *  * you may not use this file except in compliance with the License.
+ *  * You may obtain a copy of the License at
+ *  *
+ *  *     http://www.apache.org/licenses/LICENSE-2.0
+ *  *
+ *  * Unless required by applicable law or agreed to in writing, software
+ *  * distributed under the License is distributed on an "AS IS" BASIS,
+ *  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  * See the License for the specific language governing permissions and
+ *  * limitations under the License.
+ *
+ */
+
+package org.dataconservancy.bagit.rules;
+
+import static org.dataconservancy.bagit.rules.Message.ERR_NULL;
+
+/**
+ * Tokens are strings that make up a location expressions.  Location expressions are patterns that are matched against
+ * paths.  Location expressions are inspired by Apache Ant file pattern matching.
+ */
+enum Token {
+
+    /**
+     * A token matching exactly one character in an expression.
+     */
+    EXACTLY_ONE_CHARACTER("?"),
+
+    /**
+     * A token that will match multiple directory levels in an expression.
+     */
+    DIRECTORY("**"),
+
+    /**
+     * A token matching zero or more characters in an expression. <em>Must always be defined sometime after
+     * {@link #DIRECTORY}</em>
+     */
+    ZERO_OR_MORE_CHARACTERS("*"),
+
+
+    /**
+     * A token that separates path segments in an expression.
+     */
+    PATH_SEPARATOR("/"),
+
+    /**
+     * A special token with a {@code null} token string.  <em>Must always be defined last</em>
+     */
+    LITERAL();
+
+    private static final String ERR_MULTIPLE_TOKENS = "Candidate sequence '%s' contains multiple tokens.  " +
+            "Try splitting up the tokens and submitting the tokens one at a time.";
+
+    /**
+     * String representation of the token, if there is one.
+     */
+    private String tokenString;
+
+    /**
+     * Construct a Token with no string representation.  Currently reserved for {@link #LITERAL} tokens.
+     */
+    private Token() {
+        this.tokenString = null;
+    }
+
+    /**
+     * Construct a token with the supplied string representation.
+     *
+     * @param tokenString the string representation of the token.
+     * @throws java.lang.IllegalArgumentException if the {@code tokenString} is {@code null}
+     */
+    private Token(String tokenString) {
+        if (tokenString == null) {
+            throw new IllegalArgumentException(String.format(ERR_NULL, "tokenString"));
+        }
+        this.tokenString = tokenString;
+    }
+
+    /**
+     * Obtain the string form of the token, may be {@code null}.  {@link #LITERAL} tokens will not
+     * have a string form, because a literal is the set of characters that <em>do not</em> represent a token.
+     *
+     * @return the string form of the token, or {@code null} in the case of {@code LITERAL} tokens.
+     */
+    String getTokenString() {
+        return tokenString;
+    }
+
+    /**
+     * Attempts to parse a string which represents a <em>single</em> token into a {@code Token}
+     *
+     * @param candidate the candidate token string
+     * @return a {@code Token} if {@code candidate} represents a valid token
+     * @throws java.lang.IllegalArgumentException if {@code candidate} does not represent a valid token
+     */
+    static Token parse(CharSequence candidate) {
+        if (candidate == null) {
+            throw new IllegalArgumentException(String.format(ERR_NULL, "candidate"));
+        }
+
+        for (Token m : Token.values()) {
+
+            // See if the candidate token string equals the string representation
+            // of the token (except LITERAL), and return it
+            if (m.tokenString != null && m.tokenString.equals(candidate)) {
+                return m;
+            }
+
+            // Check to see if the candidate token string _contains_ the string representation
+            // of the token (except LITERAL).  If so, that means that the candidate contains multiple
+            // tokens, which isn't allowed.
+            if (candidate.length() > 1 &&
+                    m.tokenString != null &&
+                    candidate.chars().anyMatch(
+                            c -> m.tokenString.contains(Character.toString((char) c)))) {
+                throw new IllegalArgumentException(String.format(ERR_MULTIPLE_TOKENS, candidate));
+            }
+        }
+
+        // None of our Token string representations equaled the candidate string.
+        // The candidate string did not _contain_ any of the Token string representations
+        // We must be left with a LITERAL.
+
+        return LITERAL;
+    }
+
+    @Override
+    public String toString() {
+        return "Token{" +
+                "tokenString='" + tokenString + '\'' +
+                '}';
+    }
+}
diff --git a/dcs-bagit/dcs-bagit-support/src/test/java/org/dataconservancy/bagit/rules/TokenTest.java b/dcs-bagit/dcs-bagit-support/src/test/java/org/dataconservancy/bagit/rules/TokenTest.java
new file mode 100644
index 00000000..d81d2b9c
--- /dev/null
+++ b/dcs-bagit/dcs-bagit-support/src/test/java/org/dataconservancy/bagit/rules/TokenTest.java
@@ -0,0 +1,92 @@
+/*
+ *
+ *  * Copyright 2015 Johns Hopkins University
+ *  *
+ *  * Licensed under the Apache License, Version 2.0 (the "License");
+ *  * you may not use this file except in compliance with the License.
+ *  * You may obtain a copy of the License at
+ *  *
+ *  *     http://www.apache.org/licenses/LICENSE-2.0
+ *  *
+ *  * Unless required by applicable law or agreed to in writing, software
+ *  * distributed under the License is distributed on an "AS IS" BASIS,
+ *  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  * See the License for the specific language governing permissions and
+ *  * limitations under the License.
+ *
+ */
+
+package org.dataconservancy.bagit.rules;
+
+
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+/**
+ * Insures that the {@link Token} class properly parses tokens.
+ */
+public class TokenTest {
+
+    /**
+     * Tokens are strings with special meanings.  Insure all single-character tokens can be parsed.
+     */
+    @Test
+    public void testParseSingleCharacterString() throws Exception {
+        assertEquals(Token.ZERO_OR_MORE_CHARACTERS, Token.parse("*"));
+        assertEquals(Token.EXACTLY_ONE_CHARACTER, Token.parse("?"));
+        assertEquals(Token.LITERAL, Token.parse("f"));
+        assertEquals(Token.PATH_SEPARATOR, Token.parse("/"));
+    }
+
+    /**
+     * Tokens are strings with special meanings.  Insure all multi-character tokens can be parsed.
+     */
+    @Test
+    public void testParseMultipleCharacterStrings() throws Exception {
+        assertEquals(Token.DIRECTORY, Token.parse("**"));
+        assertEquals(Token.LITERAL, Token.parse("foobarbaz"));
+    }
+
+    /**
+     * Attempting to parse a string with multiple tokens is an error.
+     */
+    @Test(expected = IllegalArgumentException.class)
+    public void testParseSingleStringContainingDifferentTokens() throws Exception {
+        Token.parse("*/?**abc");
+    }
+
+    /**
+     * Attempting to parse a string with multiple tokens is an error. (Just another case similar to above)
+     */
+    @Test(expected = IllegalArgumentException.class)
+    public void testParseLiteralEndingWithPathSep() throws Exception {
+        Token.parse("directory/");
+    }
+
+    /**
+     * Zero length strings would be parsed as a literal.
+     */
+    @Test
+    public void testParseZeroLengthString() throws Exception {
+        assertEquals(Token.LITERAL, Token.parse(""));
+    }
+
+    /**
+     * Empty strings would be parsed as a literal.
+     */
+    @Test
+    public void testParseEmptyString() throws Exception {
+        assertEquals(Token.LITERAL, Token.parse(" "));
+    }
+
+    /**
+     * Parsing {@code null} results in an error
+     *
+     * @throws Exception
+     */
+    @Test(expected = IllegalArgumentException.class)
+    public void testParseNull() throws Exception {
+        assertEquals(Token.LITERAL, Token.parse(null));
+    }
+}
\ No newline at end of file

From 68b8f2eb8ca866f5236580c830f3f8008ff40199 Mon Sep 17 00:00:00 2001
From: Elliot Metsger <emetsger@jhu.edu>
Date: Thu, 10 Sep 2015 23:12:13 -0400
Subject: [PATCH 5/9] DC-2101: Added the concept of a BoundToken: a Token that
 is bound to a value.  BoundToken doesn't quite pass the smell test because it
 really only serves a purpose for LITERAL tokens.  The other tokens already
 have a value in their 'tokenString' field.

---
 .../bagit/rules/BoundToken.java               | 72 +++++++++++++++++++
 1 file changed, 72 insertions(+)
 create mode 100644 dcs-bagit/dcs-bagit-support/src/main/java/org/dataconservancy/bagit/rules/BoundToken.java

diff --git a/dcs-bagit/dcs-bagit-support/src/main/java/org/dataconservancy/bagit/rules/BoundToken.java b/dcs-bagit/dcs-bagit-support/src/main/java/org/dataconservancy/bagit/rules/BoundToken.java
new file mode 100644
index 00000000..9bff74fa
--- /dev/null
+++ b/dcs-bagit/dcs-bagit-support/src/main/java/org/dataconservancy/bagit/rules/BoundToken.java
@@ -0,0 +1,72 @@
+/*
+ * Copyright 2015 Johns Hopkins University
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.dataconservancy.bagit.rules;
+
+/**
+ * Binds a {@link Token} to the string that it represents.  Most Tokens have their strings bound already, by the
+ * {@link org.dataconservancy.bagit.rules.Token#getTokenString()} method:
+ * <dl>
+ *     <dt>{@code PATH_SEPARATOR}:</dt><dd>{@code /}</dd>
+ *     <dt>{@code EXACTLY_ONE_CHARACTER}:</dt><dd>{@code ?}</dd>
+ *     <dt>{@code ZERO_OR_MORE_CHARACTERS}:</dt><dd>{@code *}</dd>
+ *     <dt>{@code DIRECTORY}:</dt><dd>{@code **}</dd>
+ * </dl>
+ * The exception is the {@link Token#LITERAL LITERAL token}, because it isn't known, <em>a priori</em>, what the
+ * literal characters will be.
+ * <p>
+ * Therefore this class is mostly redundant, and may fail the smell test, but it serves to bind the string
+ * representation to all Tokens, useful really for only the {@code LITERAL} token.
+ * </p>
+ */
+class BoundToken {
+
+    String bound;
+    Token token;
+
+    BoundToken(Token token, String toBind) {
+        this.token = token;
+        this.bound = toBind;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) return true;
+        if (o == null || getClass() != o.getClass()) return false;
+
+        BoundToken that = (BoundToken) o;
+
+        if (bound != null ? !bound.equals(that.bound) : that.bound != null) return false;
+        if (token != that.token) return false;
+
+        return true;
+    }
+
+    @Override
+    public int hashCode() {
+        int result = bound != null ? bound.hashCode() : 0;
+        result = 31 * result + (token != null ? token.hashCode() : 0);
+        return result;
+    }
+
+    @Override
+    public String toString() {
+        return "BoundToken{" +
+                "bound='" + bound + '\'' +
+                ", token=" + token +
+                '}';
+    }
+}

From 0faf54a0fc32b4cb62ed2e9a9b7fe5c4eefe6674 Mon Sep 17 00:00:00 2001
From: Elliot Metsger <emetsger@jhu.edu>
Date: Thu, 10 Sep 2015 23:19:42 -0400
Subject: [PATCH 6/9] DC-2101: Additional parsing logic to Token, unit tests,
 javadoc  1. Added a parseString(...) method to Token, which will return a
 List<BoundToken> containing all the Tokens in the supplied String.  2.
 Changed the behavior of parse(...) to return BoundToken instead of Token.  3.
 Changed behavior of parse(...) to return a BoundToken for every literal
 character encountered.  Before it would return a single LITERAL token even if
 multiple literal characters were encountered.  The behavior of
 parseString(...) also returns a LITERAL BoundToken for each literal
 character.

The behaviors of parse(...) and parseString(...) differ, however, and this may be fixed in the future:
- parse("**") returns a single BoundToken(DIR, "**")
- parseString("**") returns a List<BoundToken> containing two BoundToken(ZERO_OR_MORE_CHARACTERS, "*")
---
 .../bagit/rules/BoundToken.java               |  28 +++++
 .../dataconservancy/bagit/rules/Token.java    |  41 +++++-
 .../bagit/rules/BoundTokensTestUtil.java      | 119 ++++++++++++++++++
 .../bagit/rules/TokenTest.java                | 108 ++++++++++++++--
 4 files changed, 281 insertions(+), 15 deletions(-)
 create mode 100644 dcs-bagit/dcs-bagit-support/src/test/java/org/dataconservancy/bagit/rules/BoundTokensTestUtil.java

diff --git a/dcs-bagit/dcs-bagit-support/src/main/java/org/dataconservancy/bagit/rules/BoundToken.java b/dcs-bagit/dcs-bagit-support/src/main/java/org/dataconservancy/bagit/rules/BoundToken.java
index 9bff74fa..589bef0e 100644
--- a/dcs-bagit/dcs-bagit-support/src/main/java/org/dataconservancy/bagit/rules/BoundToken.java
+++ b/dcs-bagit/dcs-bagit-support/src/main/java/org/dataconservancy/bagit/rules/BoundToken.java
@@ -42,6 +42,34 @@ class BoundToken {
         this.bound = toBind;
     }
 
+    /**
+     * Return true if the the value bound to this token is exactly one character.
+     *
+     * @return true if the bound value is exactly one character.
+     */
+    boolean isSingleChar() {
+        return bound.length() == 1;
+    }
+
+    /**
+     * Return the first character of the bound value as a character.
+     *
+     * @return the first character of the bound value.
+     */
+    char asChar() {
+        return bound.charAt(0);
+    }
+
+    /**
+     * Return the entire bound value as a character array.  This is what you would
+     * use if {@link #isSingleChar()} was false.
+     *
+     * @return the bound value as a character array.
+     */
+    char[] asCharArray() {
+        return bound.toCharArray();
+    }
+
     @Override
     public boolean equals(Object o) {
         if (this == o) return true;
diff --git a/dcs-bagit/dcs-bagit-support/src/main/java/org/dataconservancy/bagit/rules/Token.java b/dcs-bagit/dcs-bagit-support/src/main/java/org/dataconservancy/bagit/rules/Token.java
index af8f7643..dc6b96b4 100644
--- a/dcs-bagit/dcs-bagit-support/src/main/java/org/dataconservancy/bagit/rules/Token.java
+++ b/dcs-bagit/dcs-bagit-support/src/main/java/org/dataconservancy/bagit/rules/Token.java
@@ -18,6 +18,10 @@
 
 package org.dataconservancy.bagit.rules;
 
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.Collectors;
+
 import static org.dataconservancy.bagit.rules.Message.ERR_NULL;
 
 /**
@@ -98,8 +102,8 @@ String getTokenString() {
      * @return a {@code Token} if {@code candidate} represents a valid token
      * @throws java.lang.IllegalArgumentException if {@code candidate} does not represent a valid token
      */
-    static Token parse(CharSequence candidate) {
-        if (candidate == null) {
+    static BoundToken parse(CharSequence candidate) {
+        if (candidate == null || candidate.length() == 0) {
             throw new IllegalArgumentException(String.format(ERR_NULL, "candidate"));
         }
 
@@ -108,7 +112,7 @@ static Token parse(CharSequence candidate) {
             // See if the candidate token string equals the string representation
             // of the token (except LITERAL), and return it
             if (m.tokenString != null && m.tokenString.equals(candidate)) {
-                return m;
+                return new BoundToken(m, candidate.toString());
             }
 
             // Check to see if the candidate token string _contains_ the string representation
@@ -126,7 +130,36 @@ static Token parse(CharSequence candidate) {
         // The candidate string did not _contain_ any of the Token string representations
         // We must be left with a LITERAL.
 
-        return LITERAL;
+        return new BoundToken(Token.LITERAL, candidate.toString());
+    }
+
+    static List<BoundToken> parseString(CharSequence candidate) {
+        if (candidate == null || candidate.length() == 0) {
+            throw new IllegalArgumentException(String.format(ERR_NULL, "candidate"));
+        }
+
+        return
+            candidate.chars().mapToObj(c -> {
+                // This code block maps each character in the sequence to a BoundToken.
+
+                // Cast the int to a char, and parse it as a String
+                String s = String.valueOf((char) c);
+                BoundToken bound = null;
+
+                // Iterate over every Token (except LITERAL), and see if the string matches
+                for (Token t : Token.values()) {
+                    if (t.getTokenString() != null && t.getTokenString().equals(s)) {
+                        bound = new BoundToken(t, s);
+                    }
+                }
+
+                // If there was no match, then we must have a LITERAL.
+                if (bound == null) {
+                    bound = new BoundToken(LITERAL, s);
+                }
+
+                return bound;
+            }).collect(Collectors.toList());
     }
 
     @Override
diff --git a/dcs-bagit/dcs-bagit-support/src/test/java/org/dataconservancy/bagit/rules/BoundTokensTestUtil.java b/dcs-bagit/dcs-bagit-support/src/test/java/org/dataconservancy/bagit/rules/BoundTokensTestUtil.java
new file mode 100644
index 00000000..6cfae0b4
--- /dev/null
+++ b/dcs-bagit/dcs-bagit-support/src/test/java/org/dataconservancy/bagit/rules/BoundTokensTestUtil.java
@@ -0,0 +1,119 @@
+/*
+ *
+ *  * Copyright 2015 Johns Hopkins University
+ *  *
+ *  * Licensed under the Apache License, Version 2.0 (the "License");
+ *  * you may not use this file except in compliance with the License.
+ *  * You may obtain a copy of the License at
+ *  *
+ *  *     http://www.apache.org/licenses/LICENSE-2.0
+ *  *
+ *  * Unless required by applicable law or agreed to in writing, software
+ *  * distributed under the License is distributed on an "AS IS" BASIS,
+ *  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  * See the License for the specific language governing permissions and
+ *  * limitations under the License.
+ *  
+ */
+
+package org.dataconservancy.bagit.rules;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import static org.junit.Assert.assertEquals;
+
+/**
+ * BoundTokens that are shared across unit tests.
+ */
+class BoundTokensTestUtil {
+
+    /**
+     * The {@code BoundToken} version of {@link Token#ZERO_OR_MORE_CHARACTERS}
+     */
+    static final BoundToken ZERO_OR_MORE = new BoundToken(Token.ZERO_OR_MORE_CHARACTERS, "*");
+
+    /**
+     * The {@code BoundToken} version of {@link Token#ZERO_OR_MORE_CHARACTERS}, in a single element List
+     */
+    static final List<BoundToken> ZERO_OR_MORE_L = Arrays.asList(
+            new BoundToken(Token.ZERO_OR_MORE_CHARACTERS, "*"));
+
+    /**
+     * The {@code BoundToken} version of {@link Token#EXACTLY_ONE_CHARACTER}
+     */
+    static final BoundToken EXACTLY_ONE = new BoundToken(Token.EXACTLY_ONE_CHARACTER, "?");
+
+    /**
+     * The {@code BoundToken} version of {@link Token#EXACTLY_ONE_CHARACTER}, in a single element List
+     */
+    static final List<BoundToken> EXACTLY_ONE_L = Arrays.asList(
+            new BoundToken(Token.EXACTLY_ONE_CHARACTER, "?"));
+
+    /**
+     * The {@code BoundToken} version of {@link Token#PATH_SEPARATOR}
+     */
+    static final BoundToken PATH_SEP = new BoundToken(Token.PATH_SEPARATOR, "/");
+
+    /**
+     * The {@code BoundToken} version of {@link Token#PATH_SEPARATOR}, in a single element List
+     */
+    static final List<BoundToken> PATH_SEP_L = Arrays.asList(
+            new BoundToken(Token.PATH_SEPARATOR, "/"));
+
+    /**
+     * The {@code BoundToken} version of {@link Token#DIRECTORY}, in a single element List
+     */
+    static final BoundToken DIR = new BoundToken(Token.DIRECTORY, "**");
+
+    /**
+     * The {@code BoundToken} version of {@link Token#DIRECTORY}, represented as a List containing two
+     * {@link #ZERO_OR_MORE} BoundTokens.
+     */
+    static final List<BoundToken> DIR_L = Arrays.asList(ZERO_OR_MORE, ZERO_OR_MORE);
+
+    /**
+     * Convenience method for creating a {@link Token#LITERAL literal} token for each character in {@code s}.  It does
+     * not evaluate the characters in {@code s} for whether or not they should actually be made literals.  That is the
+     * responsibility of the developer.  (For example, this method will happily make literal tokens of "*", "?", and
+     * "/", which are not allowed by {@link Token#parse(CharSequence)}.)
+     *
+     * @param s the string to represent as a List of BoundTokens
+     * @return a List containing LITERAL BoundTokens for each character in {@code s}
+     */
+    static List<BoundToken> literalsForString(String s) {
+        ArrayList<BoundToken> literals = new ArrayList<>();
+        s.chars().forEach(c -> literals.add(new BoundToken(Token.LITERAL, String.valueOf((char) c))));
+        return literals;
+    }
+
+    /**
+     * Asserts that the values in the expected and actual Lists are equal.  This method will assert that
+     * the lists are the same size before comparing their values.
+     *
+     * @param expected the expected List of BoundTokens
+     * @param actual   the actual List of BoundTokens, normally representing a test result.
+     */
+    static void assertTokenListEquals(List<BoundToken> expected, List<BoundToken> actual) {
+        assertExpectedListCount(expected.size(), actual);
+        for (int i = 0; i < expected.size(); i++) {
+            assertEquals("Expected token: '" + expected.get(i) + "' but found '" + actual.get(i) + "'",
+                    expected.get(i), actual.get(i));
+        }
+    }
+
+    /**
+     * Asserts that the supplied list of BoundTokens has the expected count.
+     *
+     * @param expectedCount the expected number of BoundTokens in {@code actual}
+     * @param actual        a List of BoundTokens, normally representing the result of a test.
+     */
+    static void assertExpectedListCount(int expectedCount, List<BoundToken> actual) {
+        assertEquals("Expected " + expectedCount + " BoundTokens, found " + actual.size() + ": " +
+                        actual.stream().map(bt -> "['" + bt.token.name() + "', '" + bt.bound + "']")
+                                .collect(Collectors.joining(", ")),
+                expectedCount, actual.size());
+    }
+}
diff --git a/dcs-bagit/dcs-bagit-support/src/test/java/org/dataconservancy/bagit/rules/TokenTest.java b/dcs-bagit/dcs-bagit-support/src/test/java/org/dataconservancy/bagit/rules/TokenTest.java
index d81d2b9c..99db35dd 100644
--- a/dcs-bagit/dcs-bagit-support/src/test/java/org/dataconservancy/bagit/rules/TokenTest.java
+++ b/dcs-bagit/dcs-bagit-support/src/test/java/org/dataconservancy/bagit/rules/TokenTest.java
@@ -21,6 +21,17 @@
 
 import org.junit.Test;
 
+import java.util.Arrays;
+
+import static org.dataconservancy.bagit.rules.BoundTokensTestUtil.DIR;
+import static org.dataconservancy.bagit.rules.BoundTokensTestUtil.DIR_L;
+import static org.dataconservancy.bagit.rules.BoundTokensTestUtil.EXACTLY_ONE;
+import static org.dataconservancy.bagit.rules.BoundTokensTestUtil.EXACTLY_ONE_L;
+import static org.dataconservancy.bagit.rules.BoundTokensTestUtil.PATH_SEP;
+import static org.dataconservancy.bagit.rules.BoundTokensTestUtil.PATH_SEP_L;
+import static org.dataconservancy.bagit.rules.BoundTokensTestUtil.ZERO_OR_MORE;
+import static org.dataconservancy.bagit.rules.BoundTokensTestUtil.ZERO_OR_MORE_L;
+import static org.dataconservancy.bagit.rules.BoundTokensTestUtil.assertTokenListEquals;
 import static org.junit.Assert.assertEquals;
 
 /**
@@ -33,10 +44,18 @@ public class TokenTest {
      */
     @Test
     public void testParseSingleCharacterString() throws Exception {
-        assertEquals(Token.ZERO_OR_MORE_CHARACTERS, Token.parse("*"));
-        assertEquals(Token.EXACTLY_ONE_CHARACTER, Token.parse("?"));
-        assertEquals(Token.LITERAL, Token.parse("f"));
-        assertEquals(Token.PATH_SEPARATOR, Token.parse("/"));
+
+        // With parse(...)
+        assertEquals(ZERO_OR_MORE, Token.parse("*"));
+        assertEquals(EXACTLY_ONE, Token.parse("?"));
+        assertEquals(new BoundToken(Token.LITERAL, "f"), Token.parse("f"));
+        assertEquals(PATH_SEP, Token.parse("/"));
+
+        // With parseString(...)
+        assertTokenListEquals(ZERO_OR_MORE_L, Token.parseString("*"));
+        assertTokenListEquals(EXACTLY_ONE_L, Token.parseString("?"));
+        assertTokenListEquals(Arrays.asList(new BoundToken(Token.LITERAL, "f")), Token.parseString("f"));
+        assertTokenListEquals(PATH_SEP_L, Token.parseString("/"));
     }
 
     /**
@@ -44,32 +63,82 @@ public void testParseSingleCharacterString() throws Exception {
      */
     @Test
     public void testParseMultipleCharacterStrings() throws Exception {
-        assertEquals(Token.DIRECTORY, Token.parse("**"));
-        assertEquals(Token.LITERAL, Token.parse("foobarbaz"));
+        // With parse(...)
+        assertEquals(DIR, Token.parse("**"));
+        assertEquals(new BoundToken(Token.LITERAL, "foobarbaz"), Token.parse("foobarbaz"));
+
+        // With parseString(...)
+        assertTokenListEquals(DIR_L, Token.parseString("**"));
+        assertTokenListEquals(
+                Arrays.asList(new BoundToken(Token.LITERAL, "f"), new BoundToken(Token.LITERAL, "o"),
+                        new BoundToken(Token.LITERAL, "o"), new BoundToken(Token.LITERAL, "b"),
+                        new BoundToken(Token.LITERAL, "a"), new BoundToken(Token.LITERAL, "r"),
+                        new BoundToken(Token.LITERAL, "b"), new BoundToken(Token.LITERAL, "a"),
+                        new BoundToken(Token.LITERAL, "z")), Token.parseString("foobarbaz"));
     }
 
     /**
      * Attempting to parse a string with multiple tokens is an error.
+     * Legal with {@link #testParseStringSingleStringContainingDifferentTokens}.
      */
     @Test(expected = IllegalArgumentException.class)
     public void testParseSingleStringContainingDifferentTokens() throws Exception {
+        // With parse(...)
         Token.parse("*/?**abc");
     }
 
     /**
-     * Attempting to parse a string with multiple tokens is an error. (Just another case similar to above)
+     * Attempting to parseString a string with multiple tokens is ok.
+     * An error with {@link #testParseSingleStringContainingDifferentTokens()}
+     */
+    @Test
+    public void testParseStringSingleStringContainingDifferentTokens() throws Exception {
+        // With parseString(...)
+        assertTokenListEquals(Arrays.asList(ZERO_OR_MORE, PATH_SEP, EXACTLY_ONE, ZERO_OR_MORE, ZERO_OR_MORE,
+                new BoundToken(Token.LITERAL, "a"), new BoundToken(Token.LITERAL, "b"),
+                new BoundToken(Token.LITERAL, "c")), Token.parseString("*/?**abc"));
+    }
+
+    /**
+     * Attempting to parse a string with multiple tokens is an error.  Essentially the same test as
+     * {@link #testParseSingleStringContainingDifferentTokens()}.  Note this is legal with
+     * {@link #testParseStringLiteralEndingWithPathSep()}.
      */
     @Test(expected = IllegalArgumentException.class)
     public void testParseLiteralEndingWithPathSep() throws Exception {
+        // With parse(...)
         Token.parse("directory/");
     }
 
     /**
-     * Zero length strings would be parsed as a literal.
+     * Legal form of {@link #testParseLiteralEndingWithPathSep()}.
      */
     @Test
+    public void testParseStringLiteralEndingWithPathSep() throws Exception {
+        // With parseString(...)
+        assertTokenListEquals(Arrays.asList(new BoundToken(Token.LITERAL, "d"), new BoundToken(Token.LITERAL, "i"),
+                new BoundToken(Token.LITERAL, "r"), new BoundToken(Token.LITERAL, "e"),
+                new BoundToken(Token.LITERAL, "c"), new BoundToken(Token.LITERAL, "t"),
+                new BoundToken(Token.LITERAL, "o"), new BoundToken(Token.LITERAL, "r"),
+                new BoundToken(Token.LITERAL, "y"), PATH_SEP), Token.parseString("directory/"));
+    }
+
+    /**
+     * Parsing zero length strings results in an error.
+     */
+    @Test(expected = IllegalArgumentException.class)
     public void testParseZeroLengthString() throws Exception {
-        assertEquals(Token.LITERAL, Token.parse(""));
+        // With parse(...)
+        assertEquals(new BoundToken(Token.LITERAL, ""), Token.parse(""));
+    }
+
+    /**
+     * Parsing zero length strings results in an error.
+     */
+    @Test(expected = IllegalArgumentException.class)
+    public void testParseStringZeroLengthString() throws Exception {
+        // With parseString(...)
+        assertTokenListEquals(Arrays.asList(new BoundToken(Token.LITERAL, "")), Token.parseString(""));
     }
 
     /**
@@ -77,7 +146,11 @@ public void testParseZeroLengthString() throws Exception {
      */
     @Test
     public void testParseEmptyString() throws Exception {
-        assertEquals(Token.LITERAL, Token.parse(" "));
+        // With parse(...)
+        assertEquals(new BoundToken(Token.LITERAL, " "), Token.parse(" "));
+
+        // With parseString(...)
+        assertTokenListEquals(Arrays.asList(new BoundToken(Token.LITERAL, " ")), Token.parseString(" "));
     }
 
     /**
@@ -87,6 +160,19 @@ public void testParseEmptyString() throws Exception {
      */
     @Test(expected = IllegalArgumentException.class)
     public void testParseNull() throws Exception {
-        assertEquals(Token.LITERAL, Token.parse(null));
+        // With parse(...)
+        assertEquals(new BoundToken(Token.LITERAL, null), Token.parse(null));
+    }
+
+    /**
+     * Parsing {@code null} with parseString is also an error
+     *
+     * @throws Exception
+     */
+    @Test(expected = IllegalArgumentException.class)
+    public void testParseStringNull() throws Exception {
+        // With parseString(...)
+        assertTokenListEquals(Arrays.asList(new BoundToken(Token.LITERAL, null)), Token.parseString(null));
     }
+
 }
\ No newline at end of file

From 1b0a6db796eb3223a8b3b8ae12dc34bc0afe64cb Mon Sep 17 00:00:00 2001
From: Elliot Metsger <emetsger@jhu.edu>
Date: Sun, 13 Sep 2015 11:19:01 -0400
Subject: [PATCH 7/9] DC-2101:  Initial Expression and ExpressionMatcher class,
 with tests and Javadoc.  Expression represents a path, or a pattern to match
 a path.  The ExpressionMatcher is responsible for matching a path Expression
 against a pattern Expression.  There is still some work to do here with
 regard to path separators, and tokenizing Expression strings that end with
 "/".

There is one unit test to resolve, and some more Javadoc to do, class/method level as well as package level.
---
 .../bagit/rules/Expression.java               | 141 ++++
 .../bagit/rules/ExpressionMatcher.java        | 701 ++++++++++++++++++
 .../bagit/rules/ExpressionMatcherTest.java    | 393 ++++++++++
 .../bagit/rules/ExpressionTest.java           | 101 +++
 4 files changed, 1336 insertions(+)
 create mode 100644 dcs-bagit/dcs-bagit-support/src/main/java/org/dataconservancy/bagit/rules/Expression.java
 create mode 100644 dcs-bagit/dcs-bagit-support/src/main/java/org/dataconservancy/bagit/rules/ExpressionMatcher.java
 create mode 100644 dcs-bagit/dcs-bagit-support/src/test/java/org/dataconservancy/bagit/rules/ExpressionMatcherTest.java
 create mode 100644 dcs-bagit/dcs-bagit-support/src/test/java/org/dataconservancy/bagit/rules/ExpressionTest.java

diff --git a/dcs-bagit/dcs-bagit-support/src/main/java/org/dataconservancy/bagit/rules/Expression.java b/dcs-bagit/dcs-bagit-support/src/main/java/org/dataconservancy/bagit/rules/Expression.java
new file mode 100644
index 00000000..4fcd862e
--- /dev/null
+++ b/dcs-bagit/dcs-bagit-support/src/main/java/org/dataconservancy/bagit/rules/Expression.java
@@ -0,0 +1,141 @@
+/*
+ *
+ *  * Copyright 2015 Johns Hopkins University
+ *  *
+ *  * Licensed under the Apache License, Version 2.0 (the "License");
+ *  * you may not use this file except in compliance with the License.
+ *  * You may obtain a copy of the License at
+ *  *
+ *  *     http://www.apache.org/licenses/LICENSE-2.0
+ *  *
+ *  * Unless required by applicable law or agreed to in writing, software
+ *  * distributed under the License is distributed on an "AS IS" BASIS,
+ *  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  * See the License for the specific language governing permissions and
+ *  * limitations under the License.
+ *
+ */
+
+package org.dataconservancy.bagit.rules;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.ConcurrentHashMap;
+
+/**
+ * An Expression is a String that represents a hierarchical path.  An Expression may represent a path, or a pattern
+ * meant to match a path.
+ * <p>
+ * * Even though a "path" and a "pattern" are both instances of an {@link Expression}, their semantics differ.  A
+ * "path" only contains literal and path separator tokens.  A "pattern" may contain literals, path separators, and
+ * matching tokens like '*' and '?'.  Path segments are the tokens between consecutive path separators, addressable
+ * by their zero-indexed {@link org.dataconservancy.bagit.rules.Expression#depth() depth}.  For example, the Expression
+ * '/foo/bar/baz.txt' has three path segments, 'foo' (depth = 0), 'bar' (depth = 1), and 'baz.txt' (depth = 2).  The
+ * depth of the Expression is 2.
+ * </p>
+ */
+public class Expression {
+
+    /**
+     * Tokens that make up this Expression, with the left-most token at the head of the list.
+     */
+    final private List<BoundToken> tokens;
+
+    /**
+     * Tokens that make up this Expression, except any leading or trailing path separator tokens are stripped.
+     * This is more amenable to streams operations.
+     */
+    final private List<BoundToken> sanitized;
+
+    /**
+     * Map of path segments, keyed by their depth.  A path segment is a List of BoundTokens that lie between
+     * consecutive path separators.  So a path segment will never contain a path separator character.
+     */
+    final private ConcurrentHashMap<Integer, List<BoundToken>> segments = new ConcurrentHashMap<>();
+
+    /**
+     * Creates a new {@code Expression} instance from the supplied string.  Normally an Expression represents a
+     * hierarchical path, so the supplied string will resemble a pattern matching a path, or an actual path.
+     *
+     * @param expression a string representing an expression.
+     */
+    public Expression(String expression) {
+        this.tokens = ExpressionTokenizer.tokenize(expression);
+        this.sanitized = this.tokens.stream().collect(ArrayList::new, ArrayList::add, ArrayList::addAll);
+        if (this.sanitized.get(0).token == Token.PATH_SEPARATOR) {
+            this.sanitized.remove(0);
+        }
+
+        if (this.sanitized.get(this.sanitized.size() - 1).token == Token.PATH_SEPARATOR) {
+            this.sanitized.remove(this.sanitized.size() - 1);
+        }
+    }
+
+    /**
+     * The entire list of tokens that make up this {@code Expression}, including all path separators.
+     *
+     * @return the tokens that make up this {@code Expression}
+     */
+    List<BoundToken> getTokens() {
+        return tokens;
+    }
+
+    /**
+     * A zero-based index representing the depth of the {@code Expression}.
+     * <dl>
+     *     <dt>{@code /}</dt><dd>depth == 0</dd>
+     *     <dt>{@code dir/}</dt><dd>depth == 0</dd>
+     *     <dt>{@code /dir}</dt><dd>depth == 0</dd>
+     *     <dt>{@code /dir/foo}</dt><dd>depth == 1</dd>
+     *     <dt>{@code /dir/foo/bar.txt}</dt><dd>depth == 2</dd>
+     *     <dt>{@code &#x2a;&#x2a;/&#x2a;.java}</dt><dd>depth == 1</dd>
+     * </dl>
+     *
+     * @return the depth of this {@code Expression}, always 0 or greater.
+     */
+    public int depth() {
+        return (int) sanitized.stream().filter(bt -> bt.token == Token.PATH_SEPARATOR).count();
+    }
+
+    /**
+     * A path segment are the tokens that occur between two consecutive path separators.  This method obtains the
+     * tokens for the path segment specified {@code depth}.  Path separator tokens will not be included in the returned
+     * list.
+     *
+     * @param depth the zero-indexed depth of the path segment to retrieve
+     * @return the tokens making up the path segment, or an empty List if the depth is out of bounds
+     */
+    public List<BoundToken> getPathSegment(int depth) {
+        return segments.computeIfAbsent(depth, (d) -> {
+            List<BoundToken> pathSegments = new ArrayList<>();
+            int i = 0;
+            for (BoundToken t : sanitized) {
+                if (i > d) {
+                    // done recording tokens, break
+                    break;
+                }
+
+                if (t.token == Token.PATH_SEPARATOR) {
+                    // increment depth
+                    i++;
+                    // continue, we don't record path separators
+                    continue;
+                }
+
+
+                if (d - i == 0) {
+                    // record the token
+                    pathSegments.add(t);
+                }
+            }
+
+            return pathSegments;
+        });
+    }
+
+    @Override
+    public String toString() {
+        return tokens.stream()
+                .collect(StringBuilder::new, (s, bt) -> s.append(bt.bound), StringBuilder::append).toString();
+    }
+}
diff --git a/dcs-bagit/dcs-bagit-support/src/main/java/org/dataconservancy/bagit/rules/ExpressionMatcher.java b/dcs-bagit/dcs-bagit-support/src/main/java/org/dataconservancy/bagit/rules/ExpressionMatcher.java
new file mode 100644
index 00000000..6aea666f
--- /dev/null
+++ b/dcs-bagit/dcs-bagit-support/src/main/java/org/dataconservancy/bagit/rules/ExpressionMatcher.java
@@ -0,0 +1,701 @@
+/*
+ *
+ *  * Copyright 2015 Johns Hopkins University
+ *  *
+ *  * Licensed under the Apache License, Version 2.0 (the "License");
+ *  * you may not use this file except in compliance with the License.
+ *  * You may obtain a copy of the License at
+ *  *
+ *  *     http://www.apache.org/licenses/LICENSE-2.0
+ *  *
+ *  * Unless required by applicable law or agreed to in writing, software
+ *  * distributed under the License is distributed on an "AS IS" BASIS,
+ *  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  * See the License for the specific language governing permissions and
+ *  * limitations under the License.
+ *
+ */
+
+package org.dataconservancy.bagit.rules;
+
+import java.util.List;
+
+/**
+ * Responsible for matching an Expression representing a path against a pattern.  This is quite possibly the most
+ * heavy-weight string parsing library you'll ever encounter.  It is inspired by Ant-style pattern matching, and
+ * attempts to follow the same rules as the Ant <a href="https://ant.apache.org/manual/dirtasks.html">
+ * implementation</a>:
+ * <p>
+ * <pre>
+ * These patterns look very much like the patterns used in DOS and UNIX:
+ *
+ * '*' matches zero or more characters, '?' matches one character.
+ *
+ * In general, patterns are considered relative paths, relative to a task dependent base directory (the dir attribute in
+ * the case of <fileset>). Only files found below that base directory are considered. So while a pattern like
+ * ../foo.java is possible, it will not match anything when applied since the base directory's parent is never scanned
+ * for files.
+ *
+ * Examples:
+ *
+ * .java  matches  .java, x.java and FooBar.java, but not FooBar.xml (does not end with .java).
+ *
+ * ?.java  matches  x.java, A.java, but not .java or xyz.java (both don't have one character before .java).
+ *
+ * Combinations of *'s and ?'s are allowed.
+ *
+ * Matching is done per-directory. This means that first the first directory in the pattern is matched against the first
+ * directory in the path to match. Then the second directory is matched, and so on. For example, when we have the
+ * pattern /?abc&#x2f;&#x2a;&#x2f;&#x2a;.java and the path /xabc/foobar/test.java, the first ?abc is matched with
+ * xabc, then * is matched with foobar, and finally *.java is matched with test.java. They all match, so the path
+ * matches the pattern.
+ *
+ * To make things a bit more flexible, we add one extra feature, which makes it possible to match multiple directory
+ * levels. This can be used to match a complete directory tree, or a file anywhere in the directory tree. To do this,
+ * &#x2a;&#x2a; must be used as the name of a directory. When &#x2a;&#x2a; is used as the name of a directory in the
+ * pattern, it matches zero or more directories. For example: /test/&#x2a;&#x2a; matches all files/directories under
+ * &#x2f;test/, such as /test/x.java, or /test/foo/bar/xyz.html, but not /xyz.xml.
+ *
+ * There is one "shorthand": if a pattern ends with / or \, then &#x2a;&#x2a; is appended. For example,
+ * mypackage/test/ is interpreted as if it were mypackage/test/&#x2a;&#x2a;.
+ * </pre>
+ * </p>
+ * <p>
+ * Even though a "path" and a "pattern" are both instances of an {@link Expression}, their semantics differ.  A
+ * "path" only contains literal and path separator tokens.  A "pattern" may contain literals, path separators, and
+ * matching tokens like '*' and '?'.  Path segments are the tokens between consecutive path separators, addressable
+ * by their zero-indexed {@link org.dataconservancy.bagit.rules.Expression#depth() depth}.  For example, the Expression
+ * '/foo/bar/baz.txt' has three path segments, 'foo' (depth = 0), 'bar' (depth = 1), and 'baz.txt' (depth = 2).  The
+ * depth of the Expression is 2.
+ * </p>
+ * <p>
+ * Note that methods on this class are package-private, and are not meant to be exposed publicly.
+ * </p>
+ */
+public class ExpressionMatcher {
+
+    /**
+     * Convenience reference to a {@code BoundToken} that matches zero or more characters (i.e. '*').
+     * See {@link #zero_plus} for the {@code char} analog.
+     */
+    private static final BoundToken ZERO_OR_MORE = new BoundToken(Token.ZERO_OR_MORE_CHARACTERS,
+            Token.ZERO_OR_MORE_CHARACTERS.getTokenString());
+
+    /**
+     * Convenience reference to a {@code BoundToken} that matches exactly one character (i.e. '?').
+     * See {@link #exactly_one} for the {@code char} analog.
+     */
+    private static final BoundToken EXACTLY_ONE = new BoundToken(Token.EXACTLY_ONE_CHARACTER,
+            Token.EXACTLY_ONE_CHARACTER.getTokenString());
+
+    /**
+     * The {@code char} analog of {@link #EXACTLY_ONE}
+     */
+    private final char exactly_one;
+
+    /**
+     * The {@code char} analog of {@link #ZERO_OR_MORE}
+     */
+    private final char zero_plus;
+
+    /**
+     * Constructs a new instance of a matcher.
+     * TODO: probably could be private and methods be made static.
+     */
+    ExpressionMatcher() {
+        if (EXACTLY_ONE.isSingleChar()) {
+            exactly_one = EXACTLY_ONE.asChar();
+        } else {
+            throw new RuntimeException("Implementation doesn't handle multi-character token: " +
+                    Token.EXACTLY_ONE_CHARACTER);
+        }
+
+        if (ZERO_OR_MORE.isSingleChar()) {
+            zero_plus = ZERO_OR_MORE.asChar();
+        } else {
+            throw new RuntimeException("Implementation doesn't handle multi-character token: " +
+                    Token.ZERO_OR_MORE_CHARACTERS);
+
+        }
+    }
+
+    /**
+     * Match the supplied path against the pattern.  Matching is applied 'per-directory' as described
+     * {@link org.dataconservancy.bagit.rules.ExpressionMatcher above}.  This is the main entry point into the pattern
+     * matching logic.
+     *
+     * @param pattern the pattern meant to match a path
+     * @param path the path to match against the pattern
+     * @return true if the pattern matches
+     */
+    boolean match(Expression pattern, Expression path) {
+
+        // the path should just be made up of path separators and literals
+        if (!isPath(path.getTokens())) {
+            return false;  // probably should be an IAE
+        }
+
+        if (pattern.depth() > path.depth()) {
+            // if the pattern depth is greater than the path we're supposed to be matching,
+            // then we can't match, so short-circuit
+            return false;  // probably should be an IAE
+        }
+
+        if (pattern.depth() == path.depth()) {
+            boolean match = true;
+            // we have alignment, simply match each path segment from the pattern against the path.
+            for (int i = 0; i <= pattern.depth(); i++) {
+                match &= match(pattern.getPathSegment(i), path.getPathSegment(i));
+            }
+
+            return match;
+        }
+
+        int pathOff = 0;
+        int expOff = 0;
+        int nextLiteral = nextLiteral(pattern, expOff);
+
+        return matchPathSegment(pattern, path, expOff, pathOff, nextLiteral);
+    }
+
+    /**
+     * Attempt to match all of the path segments in {@code path} against {@code pattern}, starting from
+     * {@code pathDepth} and {@code patternDepth}.  The {@code nextLiteral} parameter contains the depth of the next
+     * path segment in {@code pattern} containing a literal (or -1 if there isn't any).
+     *
+     * @param pattern the expression containing a matching tokens (i.e. pattern semantics)
+     * @param path the expression containing only literals or path separators (i.e. path semantics)
+     * @param patternDepth the depth to begin matching the pattern
+     * @param pathDepth the depth to begin matching the path
+     * @param nextLiteral the depth of the next pattern segment that contains a literal, or -1 if it doesn't exist
+     * @return true if all of the segments (starting from pathDepth) in the path can be matched in the pattern (starting
+     *         from patternDepth)
+     */
+    private boolean matchPathSegment(Expression pattern, Expression path, int patternDepth, int pathDepth,
+                                     int nextLiteral) {
+
+        // if we're out of literals...
+        if (nextLiteral == -1) {
+            // See if there are remaining segments to match, and match them.
+            boolean match = true;
+            for (int i = pathDepth; i <= path.depth(); i++) {
+                match &= match(pattern.getPathSegment(patternDepth), path.getPathSegment(i));
+            }
+
+            return match;
+        }
+
+        // match the pattern segment containing literals against every path segment until we get a match
+        int rightAnchor = nextMatch(path, pathDepth, pattern.getPathSegment(nextLiteral));
+
+        // if we don't match ...
+        if (rightAnchor == -1) {
+            return false;
+        }
+
+        // make sure that every path segment from the left anchor to the right anchor matches the current path expression
+        boolean match = true;
+        for (int i = pathDepth; i < rightAnchor; i++) {
+            match &= match(pattern.getPathSegment(patternDepth), path.getPathSegment(i));
+        }
+
+        // if they match up to the anchor, keep going
+        if (match) {
+            pathDepth = rightAnchor;
+            patternDepth++;
+            nextLiteral = nextLiteral(pattern, nextLiteral + 1);
+            return matchPathSegment(pattern, path, patternDepth, pathDepth, nextLiteral);
+        }
+
+        return false;
+    }
+
+    /**
+     * Search the supplied pattern starting at {@code depth} for path segments that contain literals.  Useful for
+     * finding the depth of path segment 'Foo??.java' in the pattern expression '&#x2a;&#x2a;/Foo??.java'.
+     *
+     * @param pattern an expression with pattern semantics
+     * @param depth the depth to begin searching from
+     * @return the index of the next path segment (i.e. depth) that contains literals, or -1 if not found
+     */
+    int nextLiteral(Expression pattern, int depth) {
+        if (depth > pattern.depth()) {
+            return -1;
+        }
+
+        for (int i = depth; i <= pattern.depth(); i++) {
+            if (containsLiterals(pattern.getPathSegment(i))) {
+                return i;
+            }
+        }
+
+        return -1;
+    }
+
+    /**
+     * Attempts to match every path segment starting from {@code path.getPathSegment(depth)} against the
+     * {@code pattern}.  The {@code path} is an {@code Expression} with path semantics (i.e. only containing literals
+     * and path separators).  Each path segment (starting from {@code depth}) is matched against {@code pattern}.
+     *
+     * @param path an Expression with path semantics
+     * @param depth the depth of the expression to begin matching from
+     * @param pattern the pattern each path segment of {@code path} is matched against.
+     * @return the index of the first path segment (i.e. depth) that matched {@code pattern}, or -1 if no match
+     */
+    int nextMatch(Expression path, int depth, List<BoundToken> pattern) {
+        for (int i = depth; i <= path.depth(); i++) {
+            if (match(pattern, path.getPathSegment(i))) {
+                return i;
+            }
+        }
+
+        return -1;
+    }
+
+    /**
+     * Expected input are two Lists of BoundTokens.  Each List is expected to be a path segment; that is, a List
+     * will contain all BoundTokens between two consecutive path separators, not including the separators.  Therefore
+     * the path segment will not ever contain a path separator ('/'), nor should it contain a directory match
+     * token ('**').
+     * <p>
+     * Essentially this method is evaluating a <em>pattern</em> that may contain literals, '*', and '?' against a
+     * string of literals.
+     * </p>
+     *
+     * @param patternPathSegment the pattern
+     * @param pathPathSegment    the string (i.e. path) to match the pattern against
+     * @return true if the pattern matches the path
+     */
+    boolean match(List<BoundToken> patternPathSegment, List<BoundToken> pathPathSegment) {
+
+        // first, handle the short-circuit cases:
+        //  patternPathSegment only contains '*' ; doesn't matter what pathPathSegment has, all tokens match
+        //  patternPathSegment contains '?' and pathPathSegment only has a single token, the single token matches
+        //  patternPathSegment is all literals ; see if the pathPathSegment equals
+
+        if (isZeroOrMore(patternPathSegment)) {
+            return true;
+        }
+
+        if (pathPathSegment.size() == 1 && isExactlyOne(patternPathSegment)) {
+            return true;
+        }
+
+        if (allLiterals(patternPathSegment)) {
+            return tokenEquals(patternPathSegment, pathPathSegment);
+        }
+
+        // Otherwise, we have a multiple-token pattern that contains a mixture of literals
+        // and at least one of '*' or '?'
+
+        CharSequence pattern = toCharSeq(patternPathSegment);
+        CharSequence path = toCharSeq(pathPathSegment);
+
+        int fPatternIndex = 0;
+        int fPathIndex = 0;
+        int tokenIndex = findNextToken(pattern, fPatternIndex);
+        int literalIndex = findNextLiteral(pattern, fPatternIndex);
+
+        int leftAnchor = 0;
+
+        return match(pattern, path, fPathIndex, tokenIndex, literalIndex, leftAnchor);
+    }
+
+    /**
+     * A recursive method for matching a {@code path} against a {@code pattern}.  The method terminates when there are
+     * no more literals or tokens to be matched, or as soon as it determines a match isn't possible and returns early.
+     * <p>
+     * Developers, when reading this implementation, keep in mind that anchors are always indexes into the {@code path},
+     * while {@code tokenIndex} and {@code literalIndex} are always indexes into {@code pattern}. The first major
+     * decision made is whether the method is attempting to match a token (e.g. '?' in "Foo??.java") or match a literal
+     * (e.g. "Foo", ".java" in "Foo??.java").
+     * </p>
+     * <p>
+     * When matching a token, the first decision to make is whether you are going to match forward from the current
+     * token, or work backward from the end of the pattern.  When matching a literal, the objective is to determine the
+     * anchors of the literal in the path and attempt to match it against the pattern.
+     * </p>
+     *
+     * @param pattern the pattern to match against
+     * @param path the path to match
+     * @param fPathIndex the index into the {@code path} that has matched
+     * @param tokenIndex the index into {@code pattern} of the next token to be matched
+     * @param literalIndex the index into th {@code pattern} of the next literal to be matched
+     * @param leftAnchor not used TODO remove
+     * @return true if {@code path} matches {@code pattern}
+     */
+    private boolean match(CharSequence pattern, CharSequence path, int fPathIndex, int tokenIndex, int literalIndex, int leftAnchor) {
+        // Index description:
+        // - fPathIndex, left and right anchors are always indexes in the path
+        // - token and literal are always indexes in the pattern.
+
+
+        int rightAnchor = Integer.MIN_VALUE;
+
+        if (tokenIndex == Integer.MAX_VALUE && literalIndex == Integer.MAX_VALUE) {
+            // we've matched everything?
+            return true;
+        }
+
+        if (tokenIndex < literalIndex) {
+
+            // We are matching a token (because tokenIndex < literalIndex)
+            //
+            // If we are matching the last token in the pattern, we work backward in the path.
+            // If we are matching a token, and there are still more tokens left, we work forward in the path.
+            //
+            // - Find the left and right anchors in the path.
+            //   - Find right anchor
+            //     - Find the next literal in the pattern (using the literalIndex, and the [end of string|next token index])
+            //     - Match that literal in the path (from offset fPathIndex)
+            //     - Set the right anchor at the start of the literal.
+            // - Find left anchor
+            //   - Equal to the forward path index (fPathIndex)
+            //
+            // - If the token is a '*', we match.
+            // - If the token is a '?', and rightAnchor - leftAnchor == 1, we match.
+            //
+            // - If we match:
+            //   - set the fPathIndex to the rightAnchor (because fPathIndex keeps track of what we've matched in the path)
+            //   - set the next token index (or Integer.MIN_VALUE if the pattern is exhausted, or out of tokens)
+            //   - leave literalIndex alone, because we didn't match a literal this go-around, we matched a token.
+
+            // Find the right anchor.
+            int nextTokenIndex = findNextToken(pattern, tokenIndex + 1);
+
+            leftAnchor = fPathIndex;
+
+            CharSequence literal = null;
+
+            if (nextTokenIndex != Integer.MAX_VALUE && nextTokenIndex != Integer.MAX_VALUE) {
+                // we are not at the last token, work forward
+                // in the case of consecutive tokens (e.g. "??"), the literalIndex will be greater than the nextTokenIndex
+                int literalLen = Math.max(nextTokenIndex - literalIndex, 1);
+                literal = (literalLen <= 0) ? "" : pattern.subSequence(literalIndex, literalIndex + literalLen);
+
+                if ((rightAnchor = matchNextLiteral(path, fPathIndex, literal)) == Integer.MIN_VALUE) {
+                    // we didn't match the literal in the path, so we won't match
+                    return false;
+                }
+            } else {
+                // we are at the last token, work backward from the end of the pattern by matching the literal at
+                // the end of the pattern, then checking the remaining characters in the path with the pattern token
+
+                // the special case is if the token we are matching is the last character of the pattern, in which
+                // case there won't be a literal to match.  in this case, the right anchor will be set to
+                // the end of the path.
+
+                if (tokenIndex == pattern.length() - 1) {
+                    rightAnchor = path.length();
+                } else {
+                    literal = pattern.subSequence(tokenIndex + 1, pattern.length());
+
+                    // if we don't match the literal in the path, then we don't match
+                    if ((rightAnchor = matchNextLiteral(path, fPathIndex, literal)) == Integer.MIN_VALUE) {
+                        return false;
+                    }
+                }
+            }
+
+            // - If the token is a '*', we match.
+            // - If the token is a '?', and rightAnchor - leftAnchor == 1, we match.
+
+            // if the next token is inside of the right anchor, we have multiple tokens (e.g. '??') in a row.
+            if (pattern.charAt(tokenIndex) == exactly_one) {
+                if (nextTokenIndex < rightAnchor) {
+                    if (pattern.subSequence(tokenIndex, literalIndex).chars().allMatch(c -> ((char) c) == '?')) {
+                        fPathIndex = ++leftAnchor;
+                        tokenIndex = findNextToken(pattern, tokenIndex + 1);
+                        return match(pattern, path, fPathIndex, tokenIndex, literalIndex, leftAnchor);
+                    } else {
+                        return false;
+                    }
+                }
+
+                if (rightAnchor - leftAnchor == 1) {
+                    fPathIndex = rightAnchor;
+                    tokenIndex = findNextToken(pattern, tokenIndex + 1);
+                    return match(pattern, path, fPathIndex, tokenIndex, literalIndex, leftAnchor);
+                } else {
+                    return false;
+                }
+            }
+
+            if (pattern.charAt(tokenIndex) == zero_plus) { //||
+                //pattern.charAt(tokenIndex) == exactly_one && (rightAnchor - leftAnchor == 1) // ) {
+                //      || ((nextTokenIndex < rightAnchor) && pattern.subSequence(tokenIndex, literalIndex).chars().allMatch(c -> ((char) c) == '?'))) {
+
+                // - If we match:
+                //   - set the fPathIndex to the rightAnchor (because fPathIndex keeps track of what we've matched in the path)
+                //   - set the next token index (or Integer.MAX_VALUE if the pattern is exhausted, or out of tokens)
+                //   - leave literalIndex alone, because we didn't match a literal this go-around, we matched a token.
+
+                fPathIndex = rightAnchor;
+                tokenIndex = findNextToken(pattern, tokenIndex + 1);
+
+                return match(pattern, path, fPathIndex, tokenIndex, literalIndex, leftAnchor);
+
+            } else {
+                return false;
+            }
+
+        } else if (literalIndex < tokenIndex) {
+
+            // We are matching a literal (because literalIndex < tokenIndex)
+            CharSequence literalToMatch;
+
+            if (literalIndex == Integer.MIN_VALUE) {
+                // we're out of literals, so we just have to match that last token
+                rightAnchor = path.length();
+                if (pattern.charAt(tokenIndex) == zero_plus ||
+                        pattern.charAt(tokenIndex) == exactly_one && rightAnchor - leftAnchor == 1) {
+                    return true;
+                } else {
+                    return false;
+                }
+            } else {
+                leftAnchor = fPathIndex;
+
+                // if we can't find the right anchor, then we can't match.
+                if ((rightAnchor = findRightAnchor(pattern, path, fPathIndex, literalIndex)) == Integer.MAX_VALUE) {
+                    return false;
+                }
+
+                literalToMatch = pattern.subSequence(literalIndex, Math.min(tokenIndex, pattern.length()));
+            }
+
+            // does the literal in the pattern match the literal between the anchors?
+            if (path.subSequence(leftAnchor, rightAnchor).equals(literalToMatch)) {
+
+                // - If we match:
+                //   - set the fPathIndex to the rightAnchor (because fPathIndex keeps track of what we've matched in the path)
+                //   - leave the next token index alone, because we didn't match a token this go around, we matched a literal
+                //   - set the literal index to the beginning of the next literal
+
+                fPathIndex = rightAnchor;
+                literalIndex = findNextLiteral(pattern, literalIndex + literalToMatch.length());
+
+                return match(pattern, path, fPathIndex, tokenIndex, literalIndex, leftAnchor);
+            }
+        }
+        return false;
+    }
+
+    /**
+     * Attempt to find the next occurrence of a token in {@code pattern}, starting from {@code offset}.
+     * <p>
+     * Remember that even though paths and patterns are both instances of {@link Expression}, the semantics of
+     * a 'path' are that it contains only literals and path separators, and differs from a 'pattern' which can contain
+     * matching tokens like '*' and '?'.
+     * </p>
+     *
+     * @param pattern the pattern to search through
+     * @param offset the offset into pattern to start searching from
+     * @return the offset in the pattern with the next occurrence of a token, or {@code Integer.MAX_VALUE} if not
+     *         found.
+     * @see #findNextLiteral(CharSequence, int)
+     */
+    int findNextToken(CharSequence pattern, int offset) {
+        if (offset < 0 || offset >= pattern.length() || pattern.length() == 0) {
+            return Integer.MAX_VALUE;
+        }
+
+        for (int i = offset; i < pattern.length(); i++) {
+            if (pattern.charAt(i) == exactly_one || pattern.charAt(i) == zero_plus) {
+                return i;
+            }
+        }
+
+        return Integer.MAX_VALUE;
+    }
+
+    /**
+     * Attempt to find the next occurrence of a literal in {@code pattern}, starting from {@code offset}.
+     * <p>
+     * Remember that even though paths and patterns are both instances of {@link Expression}, the semantics of
+     * a 'path' are that it contains only literals and path separators, and differs from a 'pattern' which can contain
+     * matching tokens like '*' and '?'.
+     * </p>
+     *
+     * @param pattern the pattern to search through
+     * @param offset the offset into pattern to start searching from
+     * @return the offset in the pattern with the next occurrence of a literal, or {@code Integer.MAX_VALUE} if not
+     *         found.
+     * @see #findNextToken(CharSequence, int)
+     */
+    int findNextLiteral(CharSequence pattern, int offset) {
+        if (offset < 0 || offset >= pattern.length() || pattern.length() == 0) {
+            return Integer.MAX_VALUE;
+        }
+
+        for (int i = offset; i < pattern.length(); i++) {
+            if (pattern.charAt(i) != exactly_one && pattern.charAt(i) != zero_plus) {
+                return i;
+            }
+        }
+
+        return Integer.MAX_VALUE;
+    }
+
+    /**
+     * Attempts to match the literal in the path, from the supplied offset, and returns the offset where the
+     * literal occurs.
+     *
+     * @param path the path being searched for a literal string
+     * @param offset the offset in path to start searching from
+     * @param literal the literal string to find
+     * @return the offset of {@code literal} in {@code path}, or {@code Integer.MIN_VALUE} if not found
+     */
+    int matchNextLiteral(CharSequence path, int offset, CharSequence literal) {
+        if (offset < 0 || offset >= path.length() || path.length() == 0) {
+            return Integer.MIN_VALUE;
+        }
+
+        CharSequence sub = path.subSequence(offset, path.length());
+
+        int litIdx = 0;
+        int subIdx = 0;
+        while (litIdx < literal.length() && subIdx < sub.length()) {
+            if (literal.charAt(litIdx) == sub.charAt(subIdx)) {
+                // increment literal index if there's a match
+                litIdx++;
+            } else {
+                // reset litIdx to 0
+                litIdx = 0;
+            }
+            ;
+
+            subIdx++; // always increment the substring index
+        }
+
+        // we matched the literal if the literal index is the same as its CharSequence
+        if (literal.length() - litIdx == 0) {
+            // then the offset into the path of the beginning of the literal is
+            // offset + ( subIdx - literal.length() )
+            return offset + (subIdx - literal.length());
+        }
+
+        return Integer.MIN_VALUE;  // literal wasn't found.
+    }
+
+    /**
+     * Returns true if every token in the path segment is a {@link Token#ZERO_OR_MORE_CHARACTERS}.
+     *
+     * @param pathSegment the path segment containing arbitrary tokens
+     * @return true if every token in the path segment is a {@code ZERO_OR_MORE_CHARACTERS} token.
+     */
+    private boolean isZeroOrMore(List<BoundToken> pathSegment) {
+        return pathSegment.size() == 1 && pathSegment.get(0).token == Token.ZERO_OR_MORE_CHARACTERS;
+    }
+
+    /**
+     * Returns true if the path segment contains a single token, and the token is a {@link Token#EXACTLY_ONE_CHARACTER}.
+     *
+     * @param pathSegment the path segment containing arbitrary tokens
+     * @return true if the single token in the path segment is a {@code EXACTLY_ONE_CHARACTER} token.
+     */
+    private boolean isExactlyOne(List<BoundToken> pathSegment) {
+        return pathSegment.size() == 1 && pathSegment.get(0).token == Token.EXACTLY_ONE_CHARACTER;
+    }
+
+    /**
+     * Answers a {@code CharSequence} that contains the value of each token, in the same order, as supplied by
+     * {@code tokens}.
+     *
+     * @param tokens a List of arbitrary tokens
+     * @return the sequence of token values
+     */
+    private CharSequence toCharSeq(List<BoundToken> tokens) {
+        return tokens.stream().collect(StringBuilder::new, StringBuilder::append, StringBuilder::append);
+    }
+
+    /**
+     * Returns true if each list is equal in size, and contains
+     * {@link org.dataconservancy.bagit.rules.BoundToken#equals(Object) equal} tokens, in the same order.
+     *
+     * @param one the first list of arbitrary tokens
+     * @param two the second list of arbitrary tokens
+     * @return true if the lists contain equal content
+     */
+    private boolean tokenEquals(List<BoundToken> one, List<BoundToken> two) {
+        if (one.size() != two.size()) {
+            return false;
+        }
+
+        for (int i = 0; i < one.size(); i++) {
+            if (!one.get(i).equals(two.get(i))) {
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    /**
+     *
+     * @param pattern
+     * @param path
+     * @param pathOff
+     * @param patternOff
+     * @return
+     */
+    int findRightAnchor(CharSequence pattern, CharSequence path, int pathOff, int patternOff) {
+        // - Find the next token in the pattern from patternOff
+        // - The token index (or end of string) is the end of the literal.
+
+        // Assumes that patternOff isn't positioned at a token, and that patternOff + 1 isn't a token either
+        int nextToken = Math.min(findNextToken(pattern, patternOff), pattern.length());
+
+        CharSequence literalToMatch = pattern.subSequence(patternOff, nextToken);
+
+        // - Match that literal in the path (from offset fPathIndex)
+        // - If the literalToMatch isn't found in 'path', or if the matched literal is an empty string, return Integer.MAX_VALUE
+        if (literalToMatch.length() == 0) {
+            return Integer.MAX_VALUE;
+        }
+        int tmpIdx = matchNextLiteral(path, pathOff, literalToMatch);
+        if (tmpIdx == Integer.MIN_VALUE) {
+            return Integer.MAX_VALUE;
+        }
+
+        //     - Set the right anchor at the end of the literal.
+        return tmpIdx + literalToMatch.length();
+    }
+
+    /**
+     * Returns true if <em>any</em> of the tokens in {@code pathTokens} represent a <em>literal</em>.
+     *
+     * @param pathTokens the tokens to check, normally these are the tokens from a single path segment.
+     * @return true if any of the supplied token is a literal.
+     */
+    boolean containsLiterals(List<BoundToken> pathTokens) {
+        return (pathTokens.stream().filter(bt -> bt.token == Token.LITERAL).count() > 0);
+    }
+
+    /**
+     * Returns true if <em>all</em> of the tokens in {@code pathTokens} are <em>literal</em>.
+     *
+     * @param pathTokens the tokens to check; normally these are tokens from a single path segment.
+     * @return
+     */
+    boolean allLiterals(List<BoundToken> pathTokens) {
+        return (pathTokens.stream().filter(bt -> bt.token == Token.LITERAL).count() == pathTokens.size());
+    }
+
+    /**
+     * Returns true if <em>all</em> of the tokens in {@code pathTokens} represent a <em>literal</em> or
+     * <em>path separator</em>.  This method is used to determine of a List of tokens represents a path or a pattern.
+     * <p>
+     * For example, tokens for {@code /foo/bar/baz.txt} would return {@code true}; tokens for {@code Foo??.java} would
+     * return {@code false}, because the '?' characters are not a literal or path separator token.
+     * </p>
+     *
+     * @param pathTokens the tokens to check, normally these are tokens for a path or pattern
+     * @return true if the list of tokens contains only literals or separators
+     */
+    boolean isPath(List<BoundToken> pathTokens) {
+        return (pathTokens.stream().filter(
+                bt -> bt.token == Token.LITERAL || bt.token == Token.PATH_SEPARATOR).count() == pathTokens.size());
+    }
+
+}
diff --git a/dcs-bagit/dcs-bagit-support/src/test/java/org/dataconservancy/bagit/rules/ExpressionMatcherTest.java b/dcs-bagit/dcs-bagit-support/src/test/java/org/dataconservancy/bagit/rules/ExpressionMatcherTest.java
new file mode 100644
index 00000000..fe3a0fd7
--- /dev/null
+++ b/dcs-bagit/dcs-bagit-support/src/test/java/org/dataconservancy/bagit/rules/ExpressionMatcherTest.java
@@ -0,0 +1,393 @@
+/*
+ *
+ *  * Copyright 2015 Johns Hopkins University
+ *  *
+ *  * Licensed under the Apache License, Version 2.0 (the "License");
+ *  * you may not use this file except in compliance with the License.
+ *  * You may obtain a copy of the License at
+ *  *
+ *  *     http://www.apache.org/licenses/LICENSE-2.0
+ *  *
+ *  * Unless required by applicable law or agreed to in writing, software
+ *  * distributed under the License is distributed on an "AS IS" BASIS,
+ *  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  * See the License for the specific language governing permissions and
+ *  * limitations under the License.
+ *
+ */
+
+package org.dataconservancy.bagit.rules;
+
+import org.junit.Before;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+public class ExpressionMatcherTest {
+
+    private ExpressionMatcher underTest;
+
+    @Before
+    public void setUp() throws Exception {
+        underTest = new ExpressionMatcher();
+    }
+
+    @Test
+    public void testMatchExpressionWip3() throws Exception {
+        Expression pattern = new Expression("**/Foo??.java");
+        Expression path = new Expression("src/test/java/FooIT.java");
+        Expression nonMatchingPath = new Expression("src/test/java/FooI.java");
+
+        // sanity
+        assertTrue(underTest.match(path, path));
+
+        assertTrue(underTest.match(pattern, path));
+
+        assertFalse(underTest.match(pattern, nonMatchingPath));
+    }
+
+
+    @Test
+    public void testMatchExpressionWip2() throws Exception {
+        Expression pattern = new Expression("**/*IT.java");
+        Expression path = new Expression("src/test/java/FooIT.java");
+        Expression nonMatchingPath = new Expression("src/test/java/Bar.java");
+
+//        // sanity - insure that "*IT.java" will match "FooIT.java"
+//        List<BoundToken> tokenPattern = new ArrayList<>();
+//        tokenPattern.add(BoundTokensTestUtil.ZERO_OR_MORE);
+//        tokenPattern.addAll(BoundTokensTestUtil.literalsForString("IT.java"));
+//
+//        List<BoundToken> tokenPath = new ArrayList<>();
+//        tokenPath.addAll(BoundTokensTestUtil.literalsForString("FooIT.java"));
+//        assertTrue(underTest.match(tokenPattern, tokenPath));
+
+        // sanity
+        assertTrue(underTest.match(path, path));
+
+        assertTrue(underTest.match(pattern, path));
+
+        assertFalse(underTest.match(nonMatchingPath, path));
+    }
+
+    @Test
+    public void testMatchExpressionWip() throws Exception {
+        Expression pattern = new Expression("**/FooIT.java");
+        Expression path = new Expression("src/test/java/FooIT.java");
+        Expression nonMatchingPath = new Expression("src/test/java/BarIT.java");
+
+        // sanity
+        assertTrue(underTest.match(path, path));
+
+        assertTrue(underTest.match(pattern, path));
+
+        assertFalse(underTest.match(pattern, nonMatchingPath));
+    }
+
+    @Test
+    public void testMatchWithOnlyLiterals() throws Exception {
+        List<BoundToken> pattern = BoundTokensTestUtil.literalsForString("bar");
+        List<BoundToken> path = BoundTokensTestUtil.literalsForString("bar");
+
+        // sanity
+        assertTrue(underTest.match(pattern, path));
+
+        assertFalse(underTest.match(BoundTokensTestUtil.literalsForString("foo"), path));
+    }
+
+    @Test
+    public void testNoMatchBeginningZeroPlus() throws Exception {
+        List<BoundToken> pattern = new ArrayList<>();
+        pattern.add(BoundTokensTestUtil.ZERO_OR_MORE);
+        pattern.addAll(BoundTokensTestUtil.literalsForString("IT.java"));
+
+        List<BoundToken> path = BoundTokensTestUtil.literalsForString("src");
+
+        assertFalse(underTest.match(pattern, path));
+    }
+
+    @Test
+    public void testLiteralsWithExactlyOne() throws Exception {
+        // pattern:  "?tart?IT.jav*"
+
+        List<BoundToken> pattern = new ArrayList<>();
+        pattern.add(BoundTokensTestUtil.EXACTLY_ONE);
+        pattern.addAll(BoundTokensTestUtil.literalsForString("tart"));
+        pattern.add(BoundTokensTestUtil.EXACTLY_ONE);
+        pattern.addAll(BoundTokensTestUtil.literalsForString("IT.jav"));
+        pattern.add(BoundTokensTestUtil.ZERO_OR_MORE);
+
+        // path: startXIT.java (sanity, should pass)
+        List<BoundToken> path = BoundTokensTestUtil.literalsForString("startXIT.java");
+        assertTrue(underTest.match(pattern, path));
+
+        // path: startXIT.jav (sanity, should pass)
+        path = BoundTokensTestUtil.literalsForString("startXIT.jav");
+        assertTrue(underTest.match(pattern, path));
+
+        // path: strtXIT.java (first literal 'tart' doesn't match)
+        path = BoundTokensTestUtil.literalsForString("strtXIT.java");
+        assertFalse(underTest.match(pattern, path));
+
+        // path: startXITT.java (middle literal 'IT.jav' doesn't match)
+        path = BoundTokensTestUtil.literalsForString("startXITT.java");
+        assertFalse(underTest.match(pattern, path));
+    }
+
+    @Test
+    public void testLiteralsWithZeroPlus() throws Exception {
+        // pattern:  "*tart*IT.jav*"
+
+        List<BoundToken> pattern = new ArrayList<>();
+        pattern.add(BoundTokensTestUtil.ZERO_OR_MORE);
+        pattern.addAll(BoundTokensTestUtil.literalsForString("tart"));
+        pattern.add(BoundTokensTestUtil.ZERO_OR_MORE);
+        pattern.addAll(BoundTokensTestUtil.literalsForString("IT.jav"));
+        pattern.add(BoundTokensTestUtil.ZERO_OR_MORE);
+
+        // path: startXIT.java (sanity, should pass)
+        List<BoundToken> path = BoundTokensTestUtil.literalsForString("startXIT.java");
+        assertTrue(underTest.match(pattern, path));
+
+        // path: startXIT.jav (sanity, should pass)
+        path = BoundTokensTestUtil.literalsForString("startXIT.jav");
+        assertTrue(underTest.match(pattern, path));
+
+        // path: tartXIT.java (sanity, should pass)
+        path = BoundTokensTestUtil.literalsForString("tartXIT.java");
+        assertTrue(underTest.match(pattern, path));
+
+        // path: tartXIT.jav (sanity, should pass)
+        path = BoundTokensTestUtil.literalsForString("tartXIT.jav");
+        assertTrue(underTest.match(pattern, path));
+
+        // path: strtXIT.java (first literal 'tart' doesn't match)
+        path = BoundTokensTestUtil.literalsForString("strtXIT.java");
+        assertFalse(underTest.match(pattern, path));
+
+        // path: startXITT.java (middle literal 'IT.jav' doesn't match)
+        path = BoundTokensTestUtil.literalsForString("startXITT.java");
+        assertFalse(underTest.match(pattern, path));
+    }
+
+
+    @Test
+    public void testMultipleSingleCharacterTokens() throws Exception {
+        // pattern:  "?tart?IT.jav?"
+
+        List<BoundToken> pattern = new ArrayList<>();
+        pattern.add(BoundTokensTestUtil.EXACTLY_ONE);
+        pattern.addAll(BoundTokensTestUtil.literalsForString("tart"));
+        pattern.add(BoundTokensTestUtil.EXACTLY_ONE);
+        pattern.addAll(BoundTokensTestUtil.literalsForString("IT.jav"));
+        pattern.add(BoundTokensTestUtil.EXACTLY_ONE);
+
+        // path: startXIT.java (sanity, should pass)
+        List<BoundToken> path = BoundTokensTestUtil.literalsForString("startXIT.java");
+        assertTrue(underTest.match(pattern, path));
+
+        // path: FootartXIT.java (too many characters for first token)
+        path = BoundTokensTestUtil.literalsForString("FootartXIT.java");
+        assertFalse(underTest.match(pattern, path));
+
+        // path: tartXIT.java (no characters for first token)
+        path = BoundTokensTestUtil.literalsForString("tartXIT.java");
+        assertFalse(underTest.match(pattern, path));
+
+        // path: StartItUpIT.java (too many characters for middle token)
+        path = BoundTokensTestUtil.literalsForString("StartItUpIT.java");
+        assertFalse(underTest.match(pattern, path));
+
+        // path: StartIT.java (no characters for middle token)
+        path = BoundTokensTestUtil.literalsForString("StartIT.java");
+        assertFalse(underTest.match(pattern, path));
+
+        // path: StartXIT.jav (no characters for last token)
+        path = BoundTokensTestUtil.literalsForString("StartXIT.jav");
+        assertFalse(underTest.match(pattern, path));
+
+        // path: StartXIT.javaa (too many characters for last token)
+        path = BoundTokensTestUtil.literalsForString("StartXIT.javaa");
+        assertFalse(underTest.match(pattern, path));
+    }
+
+    @Test
+    public void testMatchLiteralFirstExactlyOneNoMatch() throws Exception {
+        // pattern:  "Start?IT.java"
+
+        List<BoundToken> pattern = new ArrayList<>();
+        pattern.addAll(BoundTokensTestUtil.literalsForString("Start"));
+        pattern.add(BoundTokensTestUtil.EXACTLY_ONE);
+        pattern.addAll(BoundTokensTestUtil.literalsForString("IT.java"));
+
+        // path: startXIT.java (sanity, should pass)
+        List<BoundToken> path = BoundTokensTestUtil.literalsForString("StartXIT.java");
+        assertTrue(underTest.match(pattern, path));
+
+        // path: StartFooIT.java (won't match)
+        path = BoundTokensTestUtil.literalsForString("StartFooIT.java");
+
+        assertFalse(underTest.match(pattern, path));
+    }
+
+    @Test
+    public void testMatchLiteralFirstExactlyOne() throws Exception {
+        // pattern:  "Start?IT.java"
+
+        List<BoundToken> pattern = new ArrayList<>();
+        pattern.addAll(BoundTokensTestUtil.literalsForString("Start"));
+        pattern.add(BoundTokensTestUtil.EXACTLY_ONE);
+        pattern.addAll(BoundTokensTestUtil.literalsForString("IT.java"));
+
+        // path: StartXIT.java
+        List<BoundToken> path = BoundTokensTestUtil.literalsForString("StartXIT.java");
+
+        assertTrue(underTest.match(pattern, path));
+    }
+
+    @Test
+    public void testMatchLiteralFirstZeroPlus() throws Exception {
+        // pattern:  "Start*IT.java"
+
+        List<BoundToken> pattern = new ArrayList<>();
+        pattern.addAll(BoundTokensTestUtil.literalsForString("Start"));
+        pattern.add(BoundTokensTestUtil.ZERO_OR_MORE);
+        pattern.addAll(BoundTokensTestUtil.literalsForString("IT.java"));
+
+        // path: StartCarIT.java
+        List<BoundToken> path = BoundTokensTestUtil.literalsForString("StartCarIT.java");
+
+        assertTrue(underTest.match(pattern, path));
+    }
+
+    @Test
+    public void testMatchTokenFirst() throws Exception {
+        // pattern:  "*File*IT.java"
+
+        List<BoundToken> pattern = new ArrayList<>();
+        pattern.add(BoundTokensTestUtil.ZERO_OR_MORE);
+        pattern.addAll(BoundTokensTestUtil.literalsForString("File"));
+        pattern.add(BoundTokensTestUtil.ZERO_OR_MORE);
+        pattern.addAll(BoundTokensTestUtil.literalsForString("IT.java"));
+
+        // path: UnixFileSmallIT.java
+        List<BoundToken> path = BoundTokensTestUtil.literalsForString("UnixFileSmallIT.java");
+
+        assertTrue(underTest.match(pattern, path));
+    }
+
+    @Test
+    public void testMatchConsecutiveMatchTokens() throws Exception {
+        // pattern: "Foo??.java"
+        List<BoundToken> pattern = new ArrayList<>();
+        pattern.addAll(BoundTokensTestUtil.literalsForString("Foo"));
+        pattern.add(BoundTokensTestUtil.EXACTLY_ONE);
+        pattern.add(BoundTokensTestUtil.EXACTLY_ONE);
+        pattern.addAll(BoundTokensTestUtil.literalsForString(".java"));
+
+        // path: FooIT.java
+        List<BoundToken> path = BoundTokensTestUtil.literalsForString("FooIT.java");
+
+        assertTrue(underTest.match(pattern, path));
+    }
+
+    @Test
+    public void testFindNextToken() throws Exception {
+        assertEquals(19, underTest.findNextToken("src/test/resources/*IT.java", 0));
+        assertEquals(0, underTest.findNextToken("*File*IT.java", 0));
+        assertEquals(5, underTest.findNextToken("*File*IT.java", 1));
+    }
+
+    @Test
+    public void testFindNextLiteral() throws Exception {
+        assertEquals(0, underTest.findNextLiteral("src/test/resources/*IT.java", 0));
+        assertEquals(1, underTest.findNextLiteral("*File*IT.java", 0));
+        assertEquals(2, underTest.findNextLiteral("*File*IT.java", 2));
+        assertEquals(5, underTest.findNextLiteral("Foo??.java", 3));
+    }
+
+    @Test
+    public void testFindNextLiteralString() throws Exception {
+        assertEquals(1, underTest.matchNextLiteral("*File*IT.java", 0, "File"));
+        assertEquals(6, underTest.matchNextLiteral("*File*IT.java", 0, "IT"));
+        assertEquals(6, underTest.matchNextLiteral("*File*IT.java", 0, "IT.java"));
+        assertEquals(6, underTest.matchNextLiteral("*File*IT.java", 4, "IT.java"));
+        assertEquals(Integer.MIN_VALUE, underTest.matchNextLiteral("*FileIT.java", 0, "doodle"));
+    }
+
+    @Test
+    public void testFindRightAnchorFromBeginning() throws Exception {
+        String pattern = "File*IT.java";
+        String path = "FileUnixIT.java";
+
+        assertEquals("File".length(), underTest.findRightAnchor(pattern, path, 0,0 ));
+    }
+
+    @Test
+    public void testFindRightAnchorFromMiddle() throws Exception {
+        String pattern = "File*IT.java";
+        String path = "FileUnixIT.java";
+
+        // find the right anchor after we've matched pattern "File*" to path "FileUnix"
+        assertEquals(path.length(), underTest.findRightAnchor(pattern, path, "FileUnix".length(), "File*".length()));
+    }
+
+    @Test
+    public void testFindRightAnchorMultipleTokens() throws Exception {
+        String pattern = "Foo*Bar*Baz";
+        String path = "FooXBarYBaz";
+
+        assertEquals(3, underTest.findRightAnchor(pattern, path, 0, 0));
+        assertEquals(7, underTest.findRightAnchor(pattern, path, "FooX".length(), "Foo*".length()));
+        assertEquals(11, underTest.findRightAnchor(pattern, path, "FooXBarY".length(), "Foo*Bar*".length()));
+    }
+
+    @Test
+    public void testFindRightAnchorFoo() throws Exception {
+        String pattern = "Foo??.java";
+        String path = "src";
+
+        // behavior when the path is not in the pattern
+        assertEquals(Integer.MAX_VALUE, underTest.findRightAnchor(pattern, path, 0, 0));
+    }
+
+    @Test
+    public void testFindRightAnchorFooIT() throws Exception {
+        String pattern = "Foo??.java";
+        String path = "FooIT.java";
+
+        // behavior when the pattern offset is positioned at a token
+        assertEquals(Integer.MAX_VALUE, underTest.findRightAnchor(pattern, path, 0, 3));
+    }
+
+    @Test
+    public void testRightAnchorBar() throws Exception {
+        String pattern = "*File*IT.java";
+        String path = "UnixFileSmallIT.java";
+
+        // behavior when the path offset is already positioned at the right anchor
+        assertEquals(8, underTest.findRightAnchor(pattern, path, 4, 1));
+    }
+
+    void assertListsEqual(List<String> expected, List<String> actual) {
+        assertExpectedCount(expected.size(), actual);
+
+        for (int i = 0; i < expected.size(); i++) {
+            assertEquals("Expected path segments to be equal.  Expected: '" + expected.get(i) +
+                    "', Actual: '" + actual.get(i) + "'", expected.get(i), actual.get(i));
+        }
+    }
+
+    void assertExpectedCount(int expectedCount, List<String> actual) {
+        assertEquals("Expected List to contain " + expectedCount + " elements.  Contained " + actual.size() + ": " +
+                        actual.stream().map(v -> "'" + v + "'").collect(Collectors.joining(", ")),
+                expectedCount, actual.size());
+    }
+}
\ No newline at end of file
diff --git a/dcs-bagit/dcs-bagit-support/src/test/java/org/dataconservancy/bagit/rules/ExpressionTest.java b/dcs-bagit/dcs-bagit-support/src/test/java/org/dataconservancy/bagit/rules/ExpressionTest.java
new file mode 100644
index 00000000..a65b5e8f
--- /dev/null
+++ b/dcs-bagit/dcs-bagit-support/src/test/java/org/dataconservancy/bagit/rules/ExpressionTest.java
@@ -0,0 +1,101 @@
+/*
+ *
+ *  * Copyright 2015 Johns Hopkins University
+ *  *
+ *  * Licensed under the Apache License, Version 2.0 (the "License");
+ *  * you may not use this file except in compliance with the License.
+ *  * You may obtain a copy of the License at
+ *  *
+ *  *     http://www.apache.org/licenses/LICENSE-2.0
+ *  *
+ *  * Unless required by applicable law or agreed to in writing, software
+ *  * distributed under the License is distributed on an "AS IS" BASIS,
+ *  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  * See the License for the specific language governing permissions and
+ *  * limitations under the License.
+ *
+ */
+
+package org.dataconservancy.bagit.rules;
+
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.dataconservancy.bagit.rules.BoundTokensTestUtil.PATH_SEP;
+import static org.dataconservancy.bagit.rules.BoundTokensTestUtil.PATH_SEP_L;
+import static org.dataconservancy.bagit.rules.BoundTokensTestUtil.ZERO_OR_MORE;
+import static org.dataconservancy.bagit.rules.BoundTokensTestUtil.assertTokenListEquals;
+import static org.dataconservancy.bagit.rules.BoundTokensTestUtil.literalsForString;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+public class ExpressionTest {
+
+    @Test
+    public void testSimple() throws Exception {
+        Expression exp = new Expression("src/test/resources/*IT.java");
+
+        // == src/test/resources/*IT.java
+        List<BoundToken> expected = literalsForString("src");
+        expected.add(PATH_SEP);
+        expected.addAll(literalsForString("test"));
+        expected.add(PATH_SEP);
+        expected.addAll(literalsForString("resources"));
+        expected.add(PATH_SEP);
+        expected.add(ZERO_OR_MORE);
+        expected.addAll(literalsForString("IT.java"));
+
+        List<BoundToken> actual = exp.getTokens();
+
+        assertTokenListEquals(expected, actual);
+
+        // depth is an index
+        assertEquals(3, exp.depth());
+
+        // get path segment by depth test
+        assertTokenListEquals(literalsForString("src"), exp.getPathSegment(0));
+        assertTokenListEquals(literalsForString("test"), exp.getPathSegment(1));
+        assertTokenListEquals(literalsForString("resources"), exp.getPathSegment(2));
+
+        expected = new ArrayList<>();
+        expected.add(ZERO_OR_MORE);
+        expected.addAll(literalsForString("IT.java"));
+        assertTokenListEquals(expected, exp.getPathSegment(3));
+
+        // out of bounds tests
+        assertTrue(exp.getPathSegment(exp.depth() + 5).isEmpty());
+        assertTrue(exp.getPathSegment(-1).isEmpty());
+    }
+
+    @Test
+    public void testWithEmptyRoot() throws Exception {
+        Expression exp = new Expression("/");
+        assertEquals(0, exp.depth());
+
+        // TODO decide what to do with the automatic addition of '**'
+        // for example, the Expression "/" is tokenized as "/**".
+        // any path ending in "/" is going to be tokenized with a trailing "**",
+        // and the user may not intend that behavior (for example if they are just wanting
+        // to express a path (not a pattern).
+        assertTokenListEquals(PATH_SEP_L, exp.getTokens());
+        assertTrue(exp.getPathSegment(0).isEmpty());
+    }
+
+    @Test
+    public void testWithSingleFileRoot() throws Exception {
+        Expression exp = new Expression("/foo.txt");
+        assertEquals(0, exp.depth());
+
+        // "/foo.txt"
+        List<BoundToken> expected = new ArrayList<>();
+        expected.add(PATH_SEP);
+        expected.addAll(literalsForString("foo.txt"));
+
+        assertTokenListEquals(expected, exp.getTokens());
+        assertFalse(exp.getPathSegment(0).isEmpty());
+        assertEquals(literalsForString("foo.txt"), exp.getPathSegment(0));
+    }
+}
\ No newline at end of file

From 400dbe9f57d498d2d403f6d0c468fa30fd0e5f48 Mon Sep 17 00:00:00 2001
From: Elliot Metsger <emetsger@jhu.edu>
Date: Tue, 15 Sep 2015 10:22:12 -0400
Subject: [PATCH 8/9] DC-2101:  Fix an error with the string representation of
 the Expression: mistakenly used BoundToken.toString() instead of
 intentionally composing a string representation of the the BoundToken.

---
 .../org/dataconservancy/bagit/rules/ExpressionMatcher.java     | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/dcs-bagit/dcs-bagit-support/src/main/java/org/dataconservancy/bagit/rules/ExpressionMatcher.java b/dcs-bagit/dcs-bagit-support/src/main/java/org/dataconservancy/bagit/rules/ExpressionMatcher.java
index 6aea666f..c3ff89aa 100644
--- a/dcs-bagit/dcs-bagit-support/src/main/java/org/dataconservancy/bagit/rules/ExpressionMatcher.java
+++ b/dcs-bagit/dcs-bagit-support/src/main/java/org/dataconservancy/bagit/rules/ExpressionMatcher.java
@@ -606,7 +606,8 @@ private boolean isExactlyOne(List<BoundToken> pathSegment) {
      * @return the sequence of token values
      */
     private CharSequence toCharSeq(List<BoundToken> tokens) {
-        return tokens.stream().collect(StringBuilder::new, StringBuilder::append, StringBuilder::append);
+        return tokens.stream().collect(StringBuilder::new, (sb, bt) -> sb.append(bt.bound),
+                StringBuilder::append).toString();
     }
 
     /**

From 3fa605b39123fa74b63ee65f014bfca307922d9c Mon Sep 17 00:00:00 2001
From: Elliot Metsger <emetsger@jhu.edu>
Date: Tue, 15 Sep 2015 16:25:49 -0400
Subject: [PATCH 9/9] DC-2101: new method
 isDirectoryMatchToken(patternPathSegment) guards match(CharSequence,
 CharSequence, int, int, int, int) from having to handle '**' tokens. 
 Includes test and Javadoc updates.

---
 .../bagit/rules/ExpressionMatcher.java        |  43 ++-
 .../bagit/rules/ExpressionMatcherTest.java    | 306 ++++++++++++------
 2 files changed, 243 insertions(+), 106 deletions(-)

diff --git a/dcs-bagit/dcs-bagit-support/src/main/java/org/dataconservancy/bagit/rules/ExpressionMatcher.java b/dcs-bagit/dcs-bagit-support/src/main/java/org/dataconservancy/bagit/rules/ExpressionMatcher.java
index c3ff89aa..4bb0f3ab 100644
--- a/dcs-bagit/dcs-bagit-support/src/main/java/org/dataconservancy/bagit/rules/ExpressionMatcher.java
+++ b/dcs-bagit/dcs-bagit-support/src/main/java/org/dataconservancy/bagit/rules/ExpressionMatcher.java
@@ -71,6 +71,13 @@
  * <p>
  * Note that methods on this class are package-private, and are not meant to be exposed publicly.
  * </p>
+ * <p>
+ * Because ExpressionMatcher is package-private, it can be hard to tell what the entry points into the ExpressionMatcher
+ * class are, and this test class doesn't help you determine that. Clients of ExpressionMatcher should be calling
+ * <ul>
+ *   <li>{@link org.dataconservancy.bagit.rules.ExpressionMatcher#match(Expression, Expression)}</li>
+ * </ul>
+ * </p>
  */
 public class ExpressionMatcher {
 
@@ -185,7 +192,7 @@ private boolean matchPathSegment(Expression pattern, Expression path, int patter
             return match;
         }
 
-        // match the pattern segment containing literals against every path segment until we get a match
+        // attempt to match every path segment against the pattern segment containing literals.
         int rightAnchor = nextMatch(path, pathDepth, pattern.getPathSegment(nextLiteral));
 
         // if we don't match ...
@@ -193,7 +200,7 @@ private boolean matchPathSegment(Expression pattern, Expression path, int patter
             return false;
         }
 
-        // make sure that every path segment from the left anchor to the right anchor matches the current path expression
+        // make sure that every path segment from the left anchor to the right anchor matches the current pattern
         boolean match = true;
         for (int i = pathDepth; i < rightAnchor; i++) {
             match &= match(pattern.getPathSegment(patternDepth), path.getPathSegment(i));
@@ -270,6 +277,7 @@ boolean match(List<BoundToken> patternPathSegment, List<BoundToken> pathPathSegm
 
         // first, handle the short-circuit cases:
         //  patternPathSegment only contains '*' ; doesn't matter what pathPathSegment has, all tokens match
+        //  patternPathSegment only contains '**' ; doesn't matter what pathPathSegment has, all tokens match
         //  patternPathSegment contains '?' and pathPathSegment only has a single token, the single token matches
         //  patternPathSegment is all literals ; see if the pathPathSegment equals
 
@@ -277,6 +285,11 @@ boolean match(List<BoundToken> patternPathSegment, List<BoundToken> pathPathSegm
             return true;
         }
 
+        if (isDirectoryMatchToken(patternPathSegment)) {
+            // this guards match(CharSequence, CharSequence, int, int, int, int) from having to handle '**' tokens.
+            return true;
+        }
+
         if (pathPathSegment.size() == 1 && isExactlyOne(patternPathSegment)) {
             return true;
         }
@@ -304,6 +317,9 @@ boolean match(List<BoundToken> patternPathSegment, List<BoundToken> pathPathSegm
     /**
      * A recursive method for matching a {@code path} against a {@code pattern}.  The method terminates when there are
      * no more literals or tokens to be matched, or as soon as it determines a match isn't possible and returns early.
+     * <strong>N.B.</strong> this method cannot handle a directory matching token: '**'.  It is expected that the caller
+     * has filtered these tokens out (see {@link #match(java.util.List, java.util.List)} and its
+     * {@link #isDirectoryMatchToken(java.util.List)} check.
      * <p>
      * Developers, when reading this implementation, keep in mind that anchors are always indexes into the {@code path},
      * while {@code tokenIndex} and {@code literalIndex} are always indexes into {@code pattern}. The first major
@@ -579,13 +595,28 @@ int matchNextLiteral(CharSequence path, int offset, CharSequence literal) {
     }
 
     /**
-     * Returns true if every token in the path segment is a {@link Token#ZERO_OR_MORE_CHARACTERS}.
+     * Returns true if the path segment contains a single {@link Token#ZERO_OR_MORE_CHARACTERS} token.
      *
      * @param pathSegment the path segment containing arbitrary tokens
-     * @return true if every token in the path segment is a {@code ZERO_OR_MORE_CHARACTERS} token.
+     * @return true if the only token in the path segment is a {@code ZERO_OR_MORE_CHARACTERS} token.
      */
-    private boolean isZeroOrMore(List<BoundToken> pathSegment) {
+    boolean isZeroOrMore(List<BoundToken> pathSegment) {
         return pathSegment.size() == 1 && pathSegment.get(0).token == Token.ZERO_OR_MORE_CHARACTERS;
+
+
+    }
+
+    /**
+     * Returns true if the path segment contains a single {@link Token#DIRECTORY} token, or exactly two
+     * {@link Token#ZERO_OR_MORE_CHARACTERS} tokens.
+     *
+     * @param pathSegment the path segment containing arbitrary tokens
+     * @return true if the path segment will match a directory
+     */
+    boolean isDirectoryMatchToken(List<BoundToken> pathSegment) {
+        return (pathSegment.size() == 1 && pathSegment.get(0).token == Token.DIRECTORY) ||
+                (pathSegment.size() == 2 && pathSegment.get(0).token == Token.ZERO_OR_MORE_CHARACTERS
+                        && pathSegment.get(1).token == Token.ZERO_OR_MORE_CHARACTERS);
     }
 
     /**
@@ -594,7 +625,7 @@ private boolean isZeroOrMore(List<BoundToken> pathSegment) {
      * @param pathSegment the path segment containing arbitrary tokens
      * @return true if the single token in the path segment is a {@code EXACTLY_ONE_CHARACTER} token.
      */
-    private boolean isExactlyOne(List<BoundToken> pathSegment) {
+    boolean isExactlyOne(List<BoundToken> pathSegment) {
         return pathSegment.size() == 1 && pathSegment.get(0).token == Token.EXACTLY_ONE_CHARACTER;
     }
 
diff --git a/dcs-bagit/dcs-bagit-support/src/test/java/org/dataconservancy/bagit/rules/ExpressionMatcherTest.java b/dcs-bagit/dcs-bagit-support/src/test/java/org/dataconservancy/bagit/rules/ExpressionMatcherTest.java
index fe3a0fd7..152c31b9 100644
--- a/dcs-bagit/dcs-bagit-support/src/test/java/org/dataconservancy/bagit/rules/ExpressionMatcherTest.java
+++ b/dcs-bagit/dcs-bagit-support/src/test/java/org/dataconservancy/bagit/rules/ExpressionMatcherTest.java
@@ -26,10 +26,30 @@
 import java.util.List;
 import java.util.stream.Collectors;
 
+import static org.dataconservancy.bagit.rules.BoundTokensTestUtil.DIR_L;
+import static org.dataconservancy.bagit.rules.BoundTokensTestUtil.EXACTLY_ONE;
+import static org.dataconservancy.bagit.rules.BoundTokensTestUtil.ZERO_OR_MORE;
+import static org.dataconservancy.bagit.rules.BoundTokensTestUtil.assertTokenListEquals;
+import static org.dataconservancy.bagit.rules.BoundTokensTestUtil.literalsForString;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
+/**
+ * Many, many tests against various methods in the ExpressionMatcher class.
+ * Most of the test methods in this class contain multiple assertions.  Normally there will be one assertion for a
+ * sanity check - an assertion that should always be true.  Often there will be multiple sanity checks.
+ * <p>
+ * Because ExpressionMatcher is package-private, it can be hard to tell what the entry points into the ExpressionMatcher
+ * class are, and this test class doesn't help you determine that. Clients of ExpressionMatcher should be calling
+ * either:
+ * <ul>
+ *   <li>{@link org.dataconservancy.bagit.rules.ExpressionMatcher#match(Expression, Expression)}</li>
+ *   <li>{@link org.dataconservancy.bagit.rules.ExpressionMatcher#match(java.util.List, java.util.List)}</li>
+ * </ul>
+ * This test class covers not only these entry point methods, but other utility methods as well.
+ * </p>
+ */
 public class ExpressionMatcherTest {
 
     private ExpressionMatcher underTest;
@@ -39,231 +59,263 @@ public void setUp() throws Exception {
         underTest = new ExpressionMatcher();
     }
 
+    /**
+     * Attempts a match using an Expression that starts with '**' and contains consecutive '?' matching tokens.
+     */
     @Test
-    public void testMatchExpressionWip3() throws Exception {
+    public void testMatchExpressionLeadingDirectoryAndConsecutiveExactlyOne() throws Exception {
+        // The pattern to match against: leading '**' and consecutive '??'
         Expression pattern = new Expression("**/Foo??.java");
+
+        // This path should match the pattern: src/test/java matches '**' and FooIT.java matches 'Foo??.java'
         Expression path = new Expression("src/test/java/FooIT.java");
+
+        // This path should not match (the consecutive token '??' will remain unmatched)
         Expression nonMatchingPath = new Expression("src/test/java/FooI.java");
 
-        // sanity
+        // sanity: a path should match itself.
         assertTrue(underTest.match(path, path));
 
         assertTrue(underTest.match(pattern, path));
-
         assertFalse(underTest.match(pattern, nonMatchingPath));
     }
 
-
+    /**
+     * Attempts a match using an Expression that starts with '**' and contains a '*' matching token.
+     */
     @Test
-    public void testMatchExpressionWip2() throws Exception {
+    public void testMatchExpressionLeadingDirectoryAndZeroPlus() throws Exception {
+        // The pattern to match against: leading '**' and a '*'
         Expression pattern = new Expression("**/*IT.java");
+
+        // This path should match the pattern: src/test/java matches '**' and FooIT.java matches '*IT.java'
         Expression path = new Expression("src/test/java/FooIT.java");
-        Expression nonMatchingPath = new Expression("src/test/java/Bar.java");
 
-//        // sanity - insure that "*IT.java" will match "FooIT.java"
-//        List<BoundToken> tokenPattern = new ArrayList<>();
-//        tokenPattern.add(BoundTokensTestUtil.ZERO_OR_MORE);
-//        tokenPattern.addAll(BoundTokensTestUtil.literalsForString("IT.java"));
-//
-//        List<BoundToken> tokenPath = new ArrayList<>();
-//        tokenPath.addAll(BoundTokensTestUtil.literalsForString("FooIT.java"));
-//        assertTrue(underTest.match(tokenPattern, tokenPath));
+        // This path should not match (the path segment Bar.java will remain unmatched)
+        Expression nonMatchingPath = new Expression("src/test/java/Bar.java");
 
-        // sanity
+        // sanity: a path should match itself
         assertTrue(underTest.match(path, path));
 
         assertTrue(underTest.match(pattern, path));
-
         assertFalse(underTest.match(nonMatchingPath, path));
     }
 
+    /**
+     * Attempts a match using an Expression that starts with a '**' matching token.
+     */
     @Test
-    public void testMatchExpressionWip() throws Exception {
+    public void testMatchExpressionLeadingDirectory() throws Exception {
+        // The pattern to match against: leading '**'
         Expression pattern = new Expression("**/FooIT.java");
+
+        // This path should match the pattern: src/test/java matches '**', and FooIT.java matches the 'FooIT.java' literal
         Expression path = new Expression("src/test/java/FooIT.java");
+
+        // This path should not match
         Expression nonMatchingPath = new Expression("src/test/java/BarIT.java");
 
-        // sanity
+        // sanity: a path should match itself
         assertTrue(underTest.match(path, path));
 
         assertTrue(underTest.match(pattern, path));
-
         assertFalse(underTest.match(pattern, nonMatchingPath));
     }
 
+    /**
+     * Attempts a match using equal lists of {@code List&lt;BoundToken>} containing only literals (no matching tokens or
+     * path separators)
+     */
     @Test
     public void testMatchWithOnlyLiterals() throws Exception {
-        List<BoundToken> pattern = BoundTokensTestUtil.literalsForString("bar");
-        List<BoundToken> path = BoundTokensTestUtil.literalsForString("bar");
+        List<BoundToken> pattern = literalsForString("bar");
+        List<BoundToken> path = literalsForString("bar");
+        assertTokenListEquals(path, pattern);
 
-        // sanity
-        assertTrue(underTest.match(pattern, path));
+        // sanity: non-equal literal token lists should not match
+        assertFalse(underTest.match(literalsForString("foo"), path));
 
-        assertFalse(underTest.match(BoundTokensTestUtil.literalsForString("foo"), path));
+        // test to make sure that equal literal token lists will match
+        assertTrue(underTest.match(pattern, path));
     }
 
+    /**
+     * Verifies that a literal will not match a pattern that contains leading directory match tokens followed by
+     * a non-matching literal.  A complicated way of saying that we verify that the pattern "*IT.java" won't match
+     * "src".
+     */
     @Test
     public void testNoMatchBeginningZeroPlus() throws Exception {
+        // pattern: *IT.java
         List<BoundToken> pattern = new ArrayList<>();
-        pattern.add(BoundTokensTestUtil.ZERO_OR_MORE);
-        pattern.addAll(BoundTokensTestUtil.literalsForString("IT.java"));
+        pattern.add(ZERO_OR_MORE);
+        pattern.addAll(literalsForString("IT.java"));
 
-        List<BoundToken> path = BoundTokensTestUtil.literalsForString("src");
+        // path: src
+        List<BoundToken> path = literalsForString("src");
 
         assertFalse(underTest.match(pattern, path));
     }
 
+    /**
+     * Attempts a match {@code List&lt;BoundToken>} leading with a '?' matching token, ending with a '*' matching
+     * token, and with a single '?' token in the middle.
+     */
     @Test
     public void testLiteralsWithExactlyOne() throws Exception {
-        // pattern:  "?tart?IT.jav*"
-
+        // pattern:  "?tart?IT.jav?"
         List<BoundToken> pattern = new ArrayList<>();
-        pattern.add(BoundTokensTestUtil.EXACTLY_ONE);
-        pattern.addAll(BoundTokensTestUtil.literalsForString("tart"));
-        pattern.add(BoundTokensTestUtil.EXACTLY_ONE);
-        pattern.addAll(BoundTokensTestUtil.literalsForString("IT.jav"));
-        pattern.add(BoundTokensTestUtil.ZERO_OR_MORE);
+        pattern.add(EXACTLY_ONE);
+        pattern.addAll(literalsForString("tart"));
+        pattern.add(EXACTLY_ONE);
+        pattern.addAll(literalsForString("IT.jav"));
+        pattern.add(EXACTLY_ONE);
 
         // path: startXIT.java (sanity, should pass)
-        List<BoundToken> path = BoundTokensTestUtil.literalsForString("startXIT.java");
-        assertTrue(underTest.match(pattern, path));
-
-        // path: startXIT.jav (sanity, should pass)
-        path = BoundTokensTestUtil.literalsForString("startXIT.jav");
+        List<BoundToken> path = literalsForString("startXIT.java");
         assertTrue(underTest.match(pattern, path));
 
         // path: strtXIT.java (first literal 'tart' doesn't match)
-        path = BoundTokensTestUtil.literalsForString("strtXIT.java");
+        path = literalsForString("strtXIT.java");
         assertFalse(underTest.match(pattern, path));
 
         // path: startXITT.java (middle literal 'IT.jav' doesn't match)
-        path = BoundTokensTestUtil.literalsForString("startXITT.java");
+        path = literalsForString("startXITT.java");
+        assertFalse(underTest.match(pattern, path));
+
+        // path: startXIT.jav (last token '?' doesn't match - missing character in path)
+        path = literalsForString("startXIT.jav");
+        assertFalse(underTest.match(pattern, path));
+
+        // path: startXIT.javaa (last literal 'a' in path doesn't match)
+        path = literalsForString("startXIT.javaa");
         assertFalse(underTest.match(pattern, path));
     }
 
+    /**
+     * Attempts a match {@code List&lt;BoundToken>} leading with a '*' matching token, ending with a '*' matching
+     * token, and with a single '*' token in the middle.
+     */
     @Test
     public void testLiteralsWithZeroPlus() throws Exception {
         // pattern:  "*tart*IT.jav*"
 
         List<BoundToken> pattern = new ArrayList<>();
-        pattern.add(BoundTokensTestUtil.ZERO_OR_MORE);
-        pattern.addAll(BoundTokensTestUtil.literalsForString("tart"));
-        pattern.add(BoundTokensTestUtil.ZERO_OR_MORE);
-        pattern.addAll(BoundTokensTestUtil.literalsForString("IT.jav"));
-        pattern.add(BoundTokensTestUtil.ZERO_OR_MORE);
+        pattern.add(ZERO_OR_MORE);
+        pattern.addAll(literalsForString("tart"));
+        pattern.add(ZERO_OR_MORE);
+        pattern.addAll(literalsForString("IT.jav"));
+        pattern.add(ZERO_OR_MORE);
 
         // path: startXIT.java (sanity, should pass)
-        List<BoundToken> path = BoundTokensTestUtil.literalsForString("startXIT.java");
+        List<BoundToken> path = literalsForString("startXIT.java");
         assertTrue(underTest.match(pattern, path));
 
         // path: startXIT.jav (sanity, should pass)
-        path = BoundTokensTestUtil.literalsForString("startXIT.jav");
+        path = literalsForString("startXIT.jav");
         assertTrue(underTest.match(pattern, path));
 
         // path: tartXIT.java (sanity, should pass)
-        path = BoundTokensTestUtil.literalsForString("tartXIT.java");
+        path = literalsForString("tartXIT.java");
         assertTrue(underTest.match(pattern, path));
 
         // path: tartXIT.jav (sanity, should pass)
-        path = BoundTokensTestUtil.literalsForString("tartXIT.jav");
+        path = literalsForString("tartXIT.jav");
         assertTrue(underTest.match(pattern, path));
 
         // path: strtXIT.java (first literal 'tart' doesn't match)
-        path = BoundTokensTestUtil.literalsForString("strtXIT.java");
+        path = literalsForString("strtXIT.java");
         assertFalse(underTest.match(pattern, path));
 
         // path: startXITT.java (middle literal 'IT.jav' doesn't match)
-        path = BoundTokensTestUtil.literalsForString("startXITT.java");
+        path = literalsForString("startXITT.java");
         assertFalse(underTest.match(pattern, path));
     }
 
-
+    /**
+     * Attempts various path matches against a pattern that contains three matching '?' tokens, at the
+     * beginning, middle, and end of the pattern.
+     */
     @Test
     public void testMultipleSingleCharacterTokens() throws Exception {
         // pattern:  "?tart?IT.jav?"
 
         List<BoundToken> pattern = new ArrayList<>();
-        pattern.add(BoundTokensTestUtil.EXACTLY_ONE);
-        pattern.addAll(BoundTokensTestUtil.literalsForString("tart"));
-        pattern.add(BoundTokensTestUtil.EXACTLY_ONE);
-        pattern.addAll(BoundTokensTestUtil.literalsForString("IT.jav"));
-        pattern.add(BoundTokensTestUtil.EXACTLY_ONE);
+        pattern.add(EXACTLY_ONE);
+        pattern.addAll(literalsForString("tart"));
+        pattern.add(EXACTLY_ONE);
+        pattern.addAll(literalsForString("IT.jav"));
+        pattern.add(EXACTLY_ONE);
 
         // path: startXIT.java (sanity, should pass)
-        List<BoundToken> path = BoundTokensTestUtil.literalsForString("startXIT.java");
+        List<BoundToken> path = literalsForString("startXIT.java");
         assertTrue(underTest.match(pattern, path));
 
         // path: FootartXIT.java (too many characters for first token)
-        path = BoundTokensTestUtil.literalsForString("FootartXIT.java");
+        path = literalsForString("FootartXIT.java");
         assertFalse(underTest.match(pattern, path));
 
         // path: tartXIT.java (no characters for first token)
-        path = BoundTokensTestUtil.literalsForString("tartXIT.java");
+        path = literalsForString("tartXIT.java");
         assertFalse(underTest.match(pattern, path));
 
         // path: StartItUpIT.java (too many characters for middle token)
-        path = BoundTokensTestUtil.literalsForString("StartItUpIT.java");
+        path = literalsForString("StartItUpIT.java");
         assertFalse(underTest.match(pattern, path));
 
         // path: StartIT.java (no characters for middle token)
-        path = BoundTokensTestUtil.literalsForString("StartIT.java");
+        path = literalsForString("StartIT.java");
         assertFalse(underTest.match(pattern, path));
 
         // path: StartXIT.jav (no characters for last token)
-        path = BoundTokensTestUtil.literalsForString("StartXIT.jav");
+        path = literalsForString("StartXIT.jav");
         assertFalse(underTest.match(pattern, path));
 
         // path: StartXIT.javaa (too many characters for last token)
-        path = BoundTokensTestUtil.literalsForString("StartXIT.javaa");
+        path = literalsForString("StartXIT.javaa");
         assertFalse(underTest.match(pattern, path));
     }
 
+    /**
+     * Attempts to match a path against a pattern containing a single matching token '?' in the middle.
+     */
     @Test
     public void testMatchLiteralFirstExactlyOneNoMatch() throws Exception {
         // pattern:  "Start?IT.java"
 
         List<BoundToken> pattern = new ArrayList<>();
-        pattern.addAll(BoundTokensTestUtil.literalsForString("Start"));
-        pattern.add(BoundTokensTestUtil.EXACTLY_ONE);
-        pattern.addAll(BoundTokensTestUtil.literalsForString("IT.java"));
+        pattern.addAll(literalsForString("Start"));
+        pattern.add(EXACTLY_ONE);
+        pattern.addAll(literalsForString("IT.java"));
 
         // path: startXIT.java (sanity, should pass)
-        List<BoundToken> path = BoundTokensTestUtil.literalsForString("StartXIT.java");
+        List<BoundToken> path = literalsForString("StartXIT.java");
         assertTrue(underTest.match(pattern, path));
 
         // path: StartFooIT.java (won't match)
-        path = BoundTokensTestUtil.literalsForString("StartFooIT.java");
+        path = literalsForString("StartFooIT.java");
 
         assertFalse(underTest.match(pattern, path));
     }
 
-    @Test
-    public void testMatchLiteralFirstExactlyOne() throws Exception {
-        // pattern:  "Start?IT.java"
-
-        List<BoundToken> pattern = new ArrayList<>();
-        pattern.addAll(BoundTokensTestUtil.literalsForString("Start"));
-        pattern.add(BoundTokensTestUtil.EXACTLY_ONE);
-        pattern.addAll(BoundTokensTestUtil.literalsForString("IT.java"));
-
-        // path: StartXIT.java
-        List<BoundToken> path = BoundTokensTestUtil.literalsForString("StartXIT.java");
-
-        assertTrue(underTest.match(pattern, path));
-    }
-
+    /**
+     * Attempts to match a path against a pattern containing a single matching token '*' in the middle.
+     */
     @Test
     public void testMatchLiteralFirstZeroPlus() throws Exception {
         // pattern:  "Start*IT.java"
 
         List<BoundToken> pattern = new ArrayList<>();
-        pattern.addAll(BoundTokensTestUtil.literalsForString("Start"));
-        pattern.add(BoundTokensTestUtil.ZERO_OR_MORE);
-        pattern.addAll(BoundTokensTestUtil.literalsForString("IT.java"));
+        pattern.addAll(literalsForString("Start"));
+        pattern.add(ZERO_OR_MORE);
+        pattern.addAll(literalsForString("IT.java"));
 
-        // path: StartCarIT.java
-        List<BoundToken> path = BoundTokensTestUtil.literalsForString("StartCarIT.java");
+        // path: StartCarIT.java ('*' should match 'Car')
+        List<BoundToken> path = literalsForString("StartCarIT.java");
+        assertTrue(underTest.match(pattern, path));
 
+        // path: StartIT.java ('*' should match zero characters)
+        path = literalsForString("StartIT.java");
         assertTrue(underTest.match(pattern, path));
     }
 
@@ -272,13 +324,13 @@ public void testMatchTokenFirst() throws Exception {
         // pattern:  "*File*IT.java"
 
         List<BoundToken> pattern = new ArrayList<>();
-        pattern.add(BoundTokensTestUtil.ZERO_OR_MORE);
-        pattern.addAll(BoundTokensTestUtil.literalsForString("File"));
-        pattern.add(BoundTokensTestUtil.ZERO_OR_MORE);
-        pattern.addAll(BoundTokensTestUtil.literalsForString("IT.java"));
+        pattern.add(ZERO_OR_MORE);
+        pattern.addAll(literalsForString("File"));
+        pattern.add(ZERO_OR_MORE);
+        pattern.addAll(literalsForString("IT.java"));
 
         // path: UnixFileSmallIT.java
-        List<BoundToken> path = BoundTokensTestUtil.literalsForString("UnixFileSmallIT.java");
+        List<BoundToken> path = literalsForString("UnixFileSmallIT.java");
 
         assertTrue(underTest.match(pattern, path));
     }
@@ -287,17 +339,53 @@ public void testMatchTokenFirst() throws Exception {
     public void testMatchConsecutiveMatchTokens() throws Exception {
         // pattern: "Foo??.java"
         List<BoundToken> pattern = new ArrayList<>();
-        pattern.addAll(BoundTokensTestUtil.literalsForString("Foo"));
-        pattern.add(BoundTokensTestUtil.EXACTLY_ONE);
-        pattern.add(BoundTokensTestUtil.EXACTLY_ONE);
-        pattern.addAll(BoundTokensTestUtil.literalsForString(".java"));
+        pattern.addAll(literalsForString("Foo"));
+        pattern.add(EXACTLY_ONE);
+        pattern.add(EXACTLY_ONE);
+        pattern.addAll(literalsForString(".java"));
 
         // path: FooIT.java
-        List<BoundToken> path = BoundTokensTestUtil.literalsForString("FooIT.java");
+        List<BoundToken> path = literalsForString("FooIT.java");
 
         assertTrue(underTest.match(pattern, path));
     }
 
+    /**
+     * Attempt to match a directory against the directory match token '**'
+     */
+    @Test
+    public void testMatchZeroPlusAndLiteral() throws Exception {
+        // pattern: "**"
+        List<BoundToken> pattern = DIR_L;
+
+        // path: "src"
+        List<BoundToken> path = literalsForString("src");
+
+        assertTrue(underTest.match(pattern, path));
+    }
+
+    /**
+     * Insures that a pattern like 'Foo**IT.java' - while almost certainly a mistake by the person who created the
+     * pattern - is a valid pattern. Make sure it matches.
+     */
+    @Test
+    public void testMatchMultipleZeroPlusTokens() throws Exception {
+        // pattern: "Foo**IT.java"
+        List<BoundToken> pattern = new ArrayList<>();
+        pattern.addAll(literalsForString("Foo"));
+        pattern.add(ZERO_OR_MORE);
+        pattern.add(ZERO_OR_MORE);
+        pattern.addAll(literalsForString("IT.java"));
+
+        // path: "FooIT.java" should match - '**' matches zero characters
+        List<BoundToken> path = literalsForString("FooIT.java");
+        assertTrue(underTest.match(pattern, path));
+
+        // path: "FooBarBazIT.java" should match - '**' matches "BarBaz"
+        path = literalsForString("FooBarBazIT.java");
+        assertTrue(underTest.match(pattern, path));
+    }
+
     @Test
     public void testFindNextToken() throws Exception {
         assertEquals(19, underTest.findNextToken("src/test/resources/*IT.java", 0));
@@ -376,6 +464,24 @@ public void testRightAnchorBar() throws Exception {
         assertEquals(8, underTest.findRightAnchor(pattern, path, 4, 1));
     }
 
+    /**
+     * Insures that the match token '**' - represented as a single BoundToken containing a DIRECTORY, or two
+     * consecutive BoundTokens containing a ZERO_OR_MORE_CHARACTERS - are both considered a "directory match" token
+     * by the ExpressionMatcher.
+     *
+     * @throws Exception
+     */
+    @Test
+    public void testIsDirectoryMatch() throws Exception {
+        List<BoundToken> directory = Arrays.asList(new BoundToken(Token.DIRECTORY, Token.DIRECTORY.getTokenString()));
+        List<BoundToken> consecutiveZeroOrMore = Arrays.asList(
+                new BoundToken(Token.ZERO_OR_MORE_CHARACTERS, Token.ZERO_OR_MORE_CHARACTERS.getTokenString()),
+                new BoundToken(Token.ZERO_OR_MORE_CHARACTERS, Token.ZERO_OR_MORE_CHARACTERS.getTokenString()));
+
+        assertTrue(underTest.isDirectoryMatchToken(directory));
+        assertTrue(underTest.isDirectoryMatchToken(consecutiveZeroOrMore));
+    }
+
     void assertListsEqual(List<String> expected, List<String> actual) {
         assertExpectedCount(expected.size(), actual);