Saturday, April 21, 2012

Parsing with Haskell

I really LOVE to create parsers in Haskell!

Below, an example of a simple markdown parser (using the parsec library).

module Syntax (
   Syntax (..)
 , Text
 , parseSyntax
 , ParseError
   ) where

import Data.Text
import Data.String
import Text.Parsec
import Text.Parsec.Text
import Control.Monad (join)

{- Syntax Table

Italic: /.../
Bold: *...*
Language switch: $...$
Quote: {...}
Link: <...|...>
Image: [...]
Paragraph: |...|
Big: ^...^

-}

data Syntax =
   Raw Text
 | Italic Syntax
 | Bold Syntax
 | Lang Syntax
 | Quote Syntax
 | Link Text Text
 | Image Text
 | Par Syntax
 | Big Syntax
 | Seq Syntax Syntax
   deriving Show -- For debugging

reschars :: [Char]
reschars = "/*${}<>[]|^"

p_Syntax :: Parser Syntax
p_Syntax = fmap (Prelude.foldr1 Seq) $ many1 $ choice $ fmap try [
   p_Raw
 , p_Chars   Italic '/'     '/'
 , p_Chars   Bold   '*'     '*'
 , p_Chars   Lang   '$'     '$'
 , p_Chars   Quote  '{'     '}'
 , p_CharsT2 Link   '<' '|' '>'
 , p_CharsT  Image  '['     ']'
 , p_Chars   Par    '|'     '|'
 , p_Chars   Big    '^'     '^'
   ]

parseSyntax :: Text -> Either ParseError Syntax
parseSyntax = parse (withEOF p_Syntax) "SyntaxSource"

----------------------------------------------------

p_Chars :: (Syntax -> a) -> Char -> Char -> Parser a
p_Chars f c1 c2 = fmap f $ between (char c1) (char c2) $ p_Syntax

p_CharsT :: (Text -> a) -> Char -> Char -> Parser a
p_CharsT f c1 c2 = char c1 >> (fmap (f . fromString) $ many1 $ noneOf [c2])
                           >>= (char c2 >>) . return

p_CharsT2 :: (Text -> Text -> a) -> Char -> Char -> Char -> Parser a
p_CharsT2 f c1 c c2 = do
 char c1
 l <- many1 $ noneOf [c] 
 char c
 s <- many1 $ noneOf [c2]
 char c2
 return $ f (fromString l) (fromString s)

p_Raw :: Parser Syntax
p_Raw = fmap (Raw . fromString) $ many1 $
           try (char '\\' >> choice (fmap char reschars))
       <|> noneOf reschars

withEOF :: (Stream s m t, Show t) => ParsecT s u m b -> ParsecT s u m b
withEOF = (>>= (eof >>) . return)

No comments: