ExamplesBy LevelBy TopicLearning Paths
741 Fundamental

741-parse-dont-validate — Parse Don't Validate

Functional Programming

Tutorial

The Problem

"Validate then use" is the traditional approach: accept raw input, check it, and then use the raw value downstream, relying on programmers to remember to validate first. Parse-don't-validate flips this: you can only construct a typed value by successfully parsing it, making invalid states unrepresentable. Coined by Alexis King in 2019, this principle is used in Haskell's text library, Rust's std::net::IpAddr, and almost every well-designed API that accepts structured input.

🎯 Learning Outcomes

  • • Create types with private fields that can only be constructed via a parsing function
  • • Model a NonEmptyString, Email, and BoundedInt that are always valid once constructed
  • • Return Result<ValidType, ParseError> instead of Result<String, Error> from parse functions
  • • Understand why private fields are essential — they prevent bypassing validation
  • • See how composed types (UserProfile) inherit validity from their components
  • Code Example

    #![allow(clippy::all)]
    /// 741: Parse-Don't-Validate
    /// Types that can ONLY be created via parsing. Once created, always valid.
    
    // ── Error types ────────────────────────────────────────────────────────────────
    
    #[derive(Debug, PartialEq)]
    pub enum ParseError {
        EmptyString,
        InvalidEmail(String),
        OutOfRange { value: i64, lo: i64, hi: i64 },
        InvalidChar(char),
    }
    
    impl std::fmt::Display for ParseError {
        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
            match self {
                ParseError::EmptyString => write!(f, "string is empty"),
                ParseError::InvalidEmail(s) => write!(f, "'{}' is not a valid email", s),
                ParseError::OutOfRange { value, lo, hi } => {
                    write!(f, "{} not in range [{}, {}]", value, lo, hi)
                }
                ParseError::InvalidChar(c) => write!(f, "invalid character '{}'", c),
            }
        }
    }
    
    // ── NonEmptyString ────────────────────────────────────────────────────────────
    
    /// A string guaranteed to be non-empty. Private field prevents direct construction.
    #[derive(Debug, Clone, PartialEq, Eq, Hash)]
    pub struct NonEmptyString(String);
    
    impl NonEmptyString {
        pub fn parse(s: &str) -> Result<Self, ParseError> {
            if s.is_empty() {
                return Err(ParseError::EmptyString);
            }
            Ok(NonEmptyString(s.to_owned()))
        }
    
        pub fn as_str(&self) -> &str {
            &self.0
        }
        pub fn len(&self) -> usize {
            self.0.len()
        }
    }
    
    impl std::fmt::Display for NonEmptyString {
        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
            f.write_str(&self.0)
        }
    }
    
    // ── Email ─────────────────────────────────────────────────────────────────────
    
    /// A validated email address.
    #[derive(Debug, Clone, PartialEq, Eq)]
    pub struct Email(String);
    
    impl Email {
        pub fn parse(s: &str) -> Result<Self, ParseError> {
            let at = s
                .find('@')
                .ok_or_else(|| ParseError::InvalidEmail(s.to_owned()))?;
            let (local, domain) = s.split_at(at);
            let domain = &domain[1..]; // skip '@'
            if local.is_empty() || !domain.contains('.') || domain.starts_with('.') {
                return Err(ParseError::InvalidEmail(s.to_owned()));
            }
            Ok(Email(s.to_ascii_lowercase()))
        }
    
        pub fn as_str(&self) -> &str {
            &self.0
        }
        pub fn local_part(&self) -> &str {
            self.0.split('@').next().unwrap()
        }
        pub fn domain(&self) -> &str {
            self.0.split('@').nth(1).unwrap()
        }
    }
    
    impl std::fmt::Display for Email {
        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
            f.write_str(&self.0)
        }
    }
    
    // ── BoundedInt ────────────────────────────────────────────────────────────────
    
    /// An integer constrained to [LO, HI].
    #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
    pub struct BoundedInt<const LO: i64, const HI: i64>(i64);
    
    impl<const LO: i64, const HI: i64> BoundedInt<LO, HI> {
        pub fn parse(n: i64) -> Result<Self, ParseError> {
            if n < LO || n > HI {
                return Err(ParseError::OutOfRange {
                    value: n,
                    lo: LO,
                    hi: HI,
                });
            }
            Ok(BoundedInt(n))
        }
    
        pub fn value(self) -> i64 {
            self.0
        }
    }
    
    // ── Functions that REQUIRE parsed types ───────────────────────────────────────
    
    /// This function only accepts valid emails — no runtime checks needed inside.
    fn send_welcome(email: &Email) -> String {
        format!("Welcome email sent to {}", email)
    }
    
    /// Only accepts non-empty usernames — no `if name.is_empty()` guards needed.
    fn create_account(username: &NonEmptyString, email: &Email) -> String {
        format!("Account '{}' created with email {}", username, email)
    }
    
    #[cfg(test)]
    mod tests {
        use super::*;
    
        #[test]
        fn valid_email_parses() {
            let e = Email::parse("user@example.com").unwrap();
            assert_eq!(e.domain(), "example.com");
            assert_eq!(e.local_part(), "user");
        }
    
        #[test]
        fn email_normalized_to_lowercase() {
            let e = Email::parse("USER@EXAMPLE.COM").unwrap();
            assert_eq!(e.as_str(), "user@example.com");
        }
    
        #[test]
        fn invalid_emails_rejected() {
            assert!(Email::parse("").is_err());
            assert!(Email::parse("noatsign").is_err());
            assert!(Email::parse("@nodomain").is_err());
            assert!(Email::parse("user@nodot").is_err());
        }
    
        #[test]
        fn non_empty_string_valid() {
            let s = NonEmptyString::parse("hello").unwrap();
            assert_eq!(s.len(), 5);
        }
    
        #[test]
        fn non_empty_string_rejects_empty() {
            assert_eq!(NonEmptyString::parse(""), Err(ParseError::EmptyString));
        }
    
        #[test]
        fn bounded_int_valid() {
            type Score = BoundedInt<0, 10>;
            assert_eq!(Score::parse(5).unwrap().value(), 5);
            assert_eq!(Score::parse(0).unwrap().value(), 0);
            assert_eq!(Score::parse(10).unwrap().value(), 10);
        }
    
        #[test]
        fn bounded_int_out_of_range() {
            type Score = BoundedInt<0, 10>;
            assert!(Score::parse(-1).is_err());
            assert!(Score::parse(11).is_err());
        }
    }

    Key Differences

  • Mechanism: Rust uses private struct fields within a crate; OCaml uses abstract module types to hide the representation.
  • Const generics: Rust's BoundedInt<LO, HI> encodes bounds in the type itself; OCaml requires runtime bounds stored in the value or a functor argument.
  • Error accumulation: Rust's ? operator short-circuits on first error; OCaml's Applicative validation pattern accumulates all errors before returning.
  • Ecosystem: Rust's garde, validator, and nutype crates generate parse-don't-validate types from derive macros; OCaml has ppx_validate.
  • OCaml Approach

    OCaml uses abstract types in modules to enforce the same invariant. A module Email : sig type t val parse : string -> (t, error) result val to_string : t -> string end ensures only parse can create an Email.t. Jane Street's Validated module and Validated_sexp follow this exact pattern. OCaml's module system makes it natural — the implementation type is hidden behind the signature.

    Full Source

    #![allow(clippy::all)]
    /// 741: Parse-Don't-Validate
    /// Types that can ONLY be created via parsing. Once created, always valid.
    
    // ── Error types ────────────────────────────────────────────────────────────────
    
    #[derive(Debug, PartialEq)]
    pub enum ParseError {
        EmptyString,
        InvalidEmail(String),
        OutOfRange { value: i64, lo: i64, hi: i64 },
        InvalidChar(char),
    }
    
    impl std::fmt::Display for ParseError {
        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
            match self {
                ParseError::EmptyString => write!(f, "string is empty"),
                ParseError::InvalidEmail(s) => write!(f, "'{}' is not a valid email", s),
                ParseError::OutOfRange { value, lo, hi } => {
                    write!(f, "{} not in range [{}, {}]", value, lo, hi)
                }
                ParseError::InvalidChar(c) => write!(f, "invalid character '{}'", c),
            }
        }
    }
    
    // ── NonEmptyString ────────────────────────────────────────────────────────────
    
    /// A string guaranteed to be non-empty. Private field prevents direct construction.
    #[derive(Debug, Clone, PartialEq, Eq, Hash)]
    pub struct NonEmptyString(String);
    
    impl NonEmptyString {
        pub fn parse(s: &str) -> Result<Self, ParseError> {
            if s.is_empty() {
                return Err(ParseError::EmptyString);
            }
            Ok(NonEmptyString(s.to_owned()))
        }
    
        pub fn as_str(&self) -> &str {
            &self.0
        }
        pub fn len(&self) -> usize {
            self.0.len()
        }
    }
    
    impl std::fmt::Display for NonEmptyString {
        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
            f.write_str(&self.0)
        }
    }
    
    // ── Email ─────────────────────────────────────────────────────────────────────
    
    /// A validated email address.
    #[derive(Debug, Clone, PartialEq, Eq)]
    pub struct Email(String);
    
    impl Email {
        pub fn parse(s: &str) -> Result<Self, ParseError> {
            let at = s
                .find('@')
                .ok_or_else(|| ParseError::InvalidEmail(s.to_owned()))?;
            let (local, domain) = s.split_at(at);
            let domain = &domain[1..]; // skip '@'
            if local.is_empty() || !domain.contains('.') || domain.starts_with('.') {
                return Err(ParseError::InvalidEmail(s.to_owned()));
            }
            Ok(Email(s.to_ascii_lowercase()))
        }
    
        pub fn as_str(&self) -> &str {
            &self.0
        }
        pub fn local_part(&self) -> &str {
            self.0.split('@').next().unwrap()
        }
        pub fn domain(&self) -> &str {
            self.0.split('@').nth(1).unwrap()
        }
    }
    
    impl std::fmt::Display for Email {
        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
            f.write_str(&self.0)
        }
    }
    
    // ── BoundedInt ────────────────────────────────────────────────────────────────
    
    /// An integer constrained to [LO, HI].
    #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
    pub struct BoundedInt<const LO: i64, const HI: i64>(i64);
    
    impl<const LO: i64, const HI: i64> BoundedInt<LO, HI> {
        pub fn parse(n: i64) -> Result<Self, ParseError> {
            if n < LO || n > HI {
                return Err(ParseError::OutOfRange {
                    value: n,
                    lo: LO,
                    hi: HI,
                });
            }
            Ok(BoundedInt(n))
        }
    
        pub fn value(self) -> i64 {
            self.0
        }
    }
    
    // ── Functions that REQUIRE parsed types ───────────────────────────────────────
    
    /// This function only accepts valid emails — no runtime checks needed inside.
    fn send_welcome(email: &Email) -> String {
        format!("Welcome email sent to {}", email)
    }
    
    /// Only accepts non-empty usernames — no `if name.is_empty()` guards needed.
    fn create_account(username: &NonEmptyString, email: &Email) -> String {
        format!("Account '{}' created with email {}", username, email)
    }
    
    #[cfg(test)]
    mod tests {
        use super::*;
    
        #[test]
        fn valid_email_parses() {
            let e = Email::parse("user@example.com").unwrap();
            assert_eq!(e.domain(), "example.com");
            assert_eq!(e.local_part(), "user");
        }
    
        #[test]
        fn email_normalized_to_lowercase() {
            let e = Email::parse("USER@EXAMPLE.COM").unwrap();
            assert_eq!(e.as_str(), "user@example.com");
        }
    
        #[test]
        fn invalid_emails_rejected() {
            assert!(Email::parse("").is_err());
            assert!(Email::parse("noatsign").is_err());
            assert!(Email::parse("@nodomain").is_err());
            assert!(Email::parse("user@nodot").is_err());
        }
    
        #[test]
        fn non_empty_string_valid() {
            let s = NonEmptyString::parse("hello").unwrap();
            assert_eq!(s.len(), 5);
        }
    
        #[test]
        fn non_empty_string_rejects_empty() {
            assert_eq!(NonEmptyString::parse(""), Err(ParseError::EmptyString));
        }
    
        #[test]
        fn bounded_int_valid() {
            type Score = BoundedInt<0, 10>;
            assert_eq!(Score::parse(5).unwrap().value(), 5);
            assert_eq!(Score::parse(0).unwrap().value(), 0);
            assert_eq!(Score::parse(10).unwrap().value(), 10);
        }
    
        #[test]
        fn bounded_int_out_of_range() {
            type Score = BoundedInt<0, 10>;
            assert!(Score::parse(-1).is_err());
            assert!(Score::parse(11).is_err());
        }
    }
    ✓ Tests Rust test suite
    #[cfg(test)]
    mod tests {
        use super::*;
    
        #[test]
        fn valid_email_parses() {
            let e = Email::parse("user@example.com").unwrap();
            assert_eq!(e.domain(), "example.com");
            assert_eq!(e.local_part(), "user");
        }
    
        #[test]
        fn email_normalized_to_lowercase() {
            let e = Email::parse("USER@EXAMPLE.COM").unwrap();
            assert_eq!(e.as_str(), "user@example.com");
        }
    
        #[test]
        fn invalid_emails_rejected() {
            assert!(Email::parse("").is_err());
            assert!(Email::parse("noatsign").is_err());
            assert!(Email::parse("@nodomain").is_err());
            assert!(Email::parse("user@nodot").is_err());
        }
    
        #[test]
        fn non_empty_string_valid() {
            let s = NonEmptyString::parse("hello").unwrap();
            assert_eq!(s.len(), 5);
        }
    
        #[test]
        fn non_empty_string_rejects_empty() {
            assert_eq!(NonEmptyString::parse(""), Err(ParseError::EmptyString));
        }
    
        #[test]
        fn bounded_int_valid() {
            type Score = BoundedInt<0, 10>;
            assert_eq!(Score::parse(5).unwrap().value(), 5);
            assert_eq!(Score::parse(0).unwrap().value(), 0);
            assert_eq!(Score::parse(10).unwrap().value(), 10);
        }
    
        #[test]
        fn bounded_int_out_of_range() {
            type Score = BoundedInt<0, 10>;
            assert!(Score::parse(-1).is_err());
            assert!(Score::parse(11).is_err());
        }
    }

    Exercises

  • Implement a PhoneNumber type that only accepts E.164 format (+ followed by 7–15 digits) via a parse function returning Result<PhoneNumber, ParseError>.
  • Create a Url newtype that validates scheme, host, and optional port, exposing typed accessors for each component.
  • Write a UserProfile::parse(name: &str, email: &str, age: i64) -> Result<UserProfile, Vec<ParseError>> that accumulates all validation errors instead of returning on the first failure.
  • Open Source Repos