git.fiddlerwoaroof.com
src/CssParser.hs
eccab7bf
 {-
  -Copyright (c) 2015 Edward Langley
  -All rights reserved.
  -
  -Redistribution and use in source and binary forms, with or without
  -modification, are permitted provided that the following conditions
  -are met:
  -
  -Redistributions of source code must retain the above copyright notice,
  -this list of conditions and the following disclaimer.
  -
  -Redistributions in binary form must reproduce the above copyright
  -notice, this list of conditions and the following disclaimer in the
  -documentation and/or other materials provided with the distribution.
  -
  -Neither the name of the project's author nor the names of its
  -contributors may be used to endorse or promote products derived from
  -this software without specific prior written permission.
  -
  -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  -FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
  -TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  -}
 
 {-# LANGUAGE RankNTypes,KindSignatures #-}
 
 module CssParser
   where
 
54824921
 import Prelude hiding (init,fst,length)
eccab7bf
 import Control.Monad
 import qualified Text.Parsec as NP
 import Text.ParserCombinators.Parsec
 import Data.Functor.Identity()
 import Text.Parsec.Prim()
 import Data.Map hiding (foldr,null,map)
 import Data.Maybe
 
 data URL = URLFunc String |
            URLString String |
            URLMedia [String] URL
   deriving (Show, Eq)
 
 
 type Query = [String]
 data CSSDeclaration = CSSDeclaration CSSSelector CSSBody |
                       PageDeclaration (Maybe String) CSSBody |
                       Charset String |
                       Import URL |
                       MediaQuery Query CSSDeclaration |
                       DeclarationList [CSSDeclaration]
   deriving (Show,Eq)
 
 
 type CSSBody = Map String String
 
 data CSSSelector =
   Ident String |
   TagName String |
   Pseudo String |
   Attribute String String |
   Id String |
   Class String |
   Sibling CSSSelector CSSSelector | -- These are all XRelation Head Tail
   DirectChild CSSSelector CSSSelector |
   Child CSSSelector CSSSelector |
   SelectorList [CSSSelector]
   deriving (Show,Eq)
 
 matchUpToX :: (Eq a,Num a) => GenParser Char st Char -> a -> GenParser Char st String
54824921
 matchUpToX p = scan id
eccab7bf
   where
     scan f x = if x == 0
       then return (f [])
       else do {
         nxt <- p;
         scan (\tail_ -> f (nxt:tail_)) (x-1)
       } <|> return (f [])
 
 match1ToX :: (Eq a,Num a) => GenParser Char st Char -> a -> GenParser Char st String
 match1ToX p n = do
   result <- p
   tail_ <- matchUpToX p (n-1)
   return (result:tail_)
 
 -- Char classes
 h :: GenParser Char st Char
 h = hexDigit
 
 nonascii  :: GenParser Char st Char
 nonascii  = satisfy isNonAscii
   where
     isNonAscii a = '\240' <= a && a <= '\1114111'
 
 unicode :: GenParser Char st String
 unicode = do
54824921
   _ <- char '\\'
eccab7bf
   result <- match1ToX h (6 :: Integer)
   spaces
   return result
 
 escape :: GenParser Char st String
54824921
 escape = liftM ('\\':) (try unicode) <|> char '\\' <:> many1 (noneOf "\r\n\f0123456789abcdef")
eccab7bf
 
 nmstart :: GenParser Char st Char
 nmstart = oneOf "-" <|> letter
 
 nmchar :: GenParser Char st Char
 nmchar = oneOf "_-" <|> alphaNum
 
 
 space_ :: GenParser Char st Char
 space_ = oneOf " \t\r\n\f"
 
 s :: GenParser Char st String
 s = many1 space_
 
 w :: GenParser Char st String
 w = many space_
 
 nl :: GenParser Char st String
54824921
 nl = (string "\n" <|> try (string "\r\n") <|> string "\r" <|> string "\f") >> return "\n"
eccab7bf
 
 genBadString :: Char -> GenParser Char st String
 genBadString c = do
54824921
   _ <- char c
   result <- liftM concat $
     many $ many1 (noneOf (c:"\n\r\f")) <|> (char '\\' >> nl) <|> escape
   _ <- char '\\'
   _ <- anyChar
eccab7bf
   return result
 
 badstring :: GenParser Char st String
 badstring = genBadString '"' <|> genBadString '\''
 
 string_ :: GenParser Char st String
 string_ = do
   a <- char '"' <|> char '\''
54824921
   result <- liftM concat $ many $
     try (char '\\' <:> nl) <|> try escape <|> liftM (:[]) (noneOf (a:"\n\r\f"))
   _ <- char a
eccab7bf
   return result
 
 -- badcomment, baduri, comment undefined
 
 -- tested
 ident :: GenParser Char st String
 ident = do
   head_ <- optionMaybe $ string "-"
   result <- nmstart
   tail_ <- many nmchar
54824921
   return $ fromMaybe "" head_ ++ (result:tail_)
eccab7bf
 
 --tested
 name :: GenParser Char st String
 name = many1 nmchar
 
 -- tested
 num :: GenParser Char st String
 num = floating <|> many1 digit
   where
     floating = try $ do
       radix <- many digit
       mantissa <- char '.' <:> many1 digit
       return $ radix ++ mantissa
 
 cdo :: GenParser Char st String
 cdo = string "<!--"
 
 cdc :: GenParser Char st String
 cdc = string "-->"
 
 -- TODO: figger out if / should be here
 url :: GenParser Char st String
54824921
 url = liftM concat $ many $ many1 (oneOf "!#$%&*-~" <|> nonascii) <|> escape
eccab7bf
 
 atEnd :: forall b s u (m :: * -> *) t. (Show t, NP.Stream s m t) => NP.ParsecT s u m b -> NP.ParsecT s u m b
 atEnd p = p >>= ((eof >>) . return)
 
 -- partially tested
 urlParse :: GenParser Char st URL
 urlParse = do
54824921
   result <- between (string "url(" >> spaces) (char ')') urlChars
eccab7bf
   return $ URLFunc result
54824921
   where
     urlChars = withSpace $ string_ <|> many1 (oneOf "=?.,:/" <|> alphaNum) <|> url
eccab7bf
 stylesheet :: GenParser Char st CSSDeclaration
 stylesheet = do -- TODO: allow the letters to be escaped in these strings
   chrset <- charset
   let cset  = case chrset of
               Nothing -> [];
               Just x -> [x]
54824921
   _ <- many $ s <|> cdo <|> cdc
eccab7bf
 
54824921
   imports <- liftM concat $ many $ do
     imports <- many1 import_
     _ <- many ((cdo >> spaces) <|> (cdc >> spaces))
     return imports
eccab7bf
 
   result <- many $ ruleset  <|> media <|> page
54824921
   _ <- many $ (cdo >> many s) <|> (cdc >> many s)
eccab7bf
 
   return $ DeclarationList $ cset ++ imports ++ result
 
 page :: GenParser Char st CSSDeclaration
 page = do
54824921
   _ <- string "@page"
eccab7bf
   spaces
54824921
   pseudo_ <- optionMaybe pseudoPage
eccab7bf
 
   body <- rulebody
   return $ PageDeclaration pseudo_ $ fromList body
 
54824921
 pseudoPage :: GenParser Char st String
 pseudoPage = char ':' >> ident >>= (spaces >>) . return 
eccab7bf
 
 
 charset :: GenParser Char st (Maybe CSSDeclaration)
 charset = optionMaybe $ do
54824921
   _ <- try $ string "@charset "
eccab7bf
   result <- string_
54824921
   _ <- char ';'
eccab7bf
   return $ Charset result
 
 medium :: GenParser Char st String
 medium = ident >>= (\q -> spaces >> return q)
 
 mediaList :: GenParser Char st [String]
 mediaList = sepBy medium (char ',' >> spaces)
 
 media :: GenParser Char st CSSDeclaration
 media =  do
54824921
   _ <- try $ string "@media"
eccab7bf
   spaces
   query <- mediaList
   body <- between (char '{') (char '}') $ do
     spaces
     rules <- many ruleset
     spaces
     return rules
   return $ MediaQuery query $ DeclarationList body
 
 import_ :: GenParser Char st CSSDeclaration
 import_ = do
54824921
   _ <- try $ string "@import "
eccab7bf
   spaces
54824921
   result <- liftM URLString string_ <|> urlParse
eccab7bf
   spaces
54824921
   tail_ <- optionMaybe mediaList
   _ <- char ';'
eccab7bf
   spaces
   return $ Import $ case tail_ of
     Nothing -> result
     Just x -> URLMedia x result
 
 
 
 cssRule :: GenParser Char st CSSSelector
54824921
 cssRule = liftM SelectorList (sepBy1 selector $ char ',' >> spaces)
eccab7bf
 
 selector :: GenParser Char st CSSSelector
 selector = do
   result <- simpleSelector
   tail_ <- optionMaybe $ parseCombinator result <|> try (parseOCombinator result)
   spaces
   return $ fromMaybe result tail_
 
 -- This returns the value of a combinator, ignoring trailing spaces
 withSpace :: GenParser Char u b -> GenParser Char u b
 withSpace = (>>= ((spaces >>) . return))
 
 oneOfWithSpace :: String -> GenParser Char st Char
 oneOfWithSpace ss = foldr1 (<|>) parsers
   where
     parsers = map (withSpace . char) ss
 
 operator :: GenParser Char st Char
 operator = oneOfWithSpace "/,"
 
 combinator :: GenParser Char st Char
 combinator = oneOfWithSpace "+>"
 
54824921
 unaryOperator :: GenParser Char st Char
 unaryOperator = oneOf "-+"
eccab7bf
 
 -- parse an optional combinator
 parseOCombinator :: CSSSelector -> GenParser Char st CSSSelector
 parseOCombinator head_ = do
   spaces
54824921
   combine <- optionMaybe combinator
eccab7bf
   sel <- selector
   return $ case combine of
     Nothing -> Child head_ sel
     (Just x) -> case x of
       '+' -> Sibling head_ sel
       '>' -> DirectChild head_ sel
54824921
       _ -> undefined
eccab7bf
 
 -- parse a combinator
 parseCombinator :: CSSSelector -> GenParser Char st CSSSelector
 parseCombinator head_ = do
   combine <- combinator
   sel <- selector
   return $ case combine of
     '+' -> Sibling head_ sel
     '>' -> DirectChild head_ sel
54824921
     _ -> undefined
eccab7bf
 
 simpleSelector :: GenParser Char st CSSSelector
 simpleSelector = do
   result <- (try elementName <:> many modifier) <|> many1 modifier
   return $ if null $ tail result
     then head result
     else SelectorList result
 
 modifier :: GenParser Char st CSSSelector
 modifier = hash <|> class_ <|> attrib <|> pseudo
 
 elementName :: GenParser Char st CSSSelector
 elementName = liftM TagName $ ident <|> string "*"
 
 hash :: GenParser Char st CSSSelector
 hash = char '#' >> liftM Id ident
 
 class_ :: GenParser Char st CSSSelector
 class_ = char '.' >> liftM Class ident
 
 attrib :: GenParser Char st CSSSelector
 attrib = between (char '[') (char ']') $ do
   spaces
   key <- ident
   spaces
54824921
   value <- char '=' >> ident
eccab7bf
   spaces
   return $ Attribute key value
 
 pseudo :: GenParser Char st CSSSelector
 pseudo = char ':' >> liftM Pseudo selectorIdent
   where
     selectorIdent = do
       init <- optionMaybe $ string ":"
       idnt <- ident
54824921
       return $ fromMaybe "" init ++ idnt
eccab7bf
 
 
 {-# ANN (<++>) "HLint: ignore" #-}
 (<++>) :: Monad m => m [a] -> m [a] -> m [a]
 a <++> b = liftM2 (++) a b
 
 (<:>) :: Monad m => m a -> m [a] -> m [a]
 a <:> b = liftM2 (:) a b
 
 
 ruleset :: GenParser Char st CSSDeclaration
 ruleset = do
   selector_ <- cssRule
   spaces
   result <- rulebody
   return $ CSSDeclaration selector_ $ fromList result
   
 rulebody :: GenParser Char st [(String, String)]
 rulebody = between (withSpace $ char '{') (withSpace $ char '}') $ do
     head_ <- declaration
     tail_ <- many $ do
54824921
       char ';' >> spaces
eccab7bf
       optionMaybe declaration
54824921
     _ <- spaces
     return (head_:catMaybes tail_)
eccab7bf
 
 declaration :: GenParser Char st (String,String)
 declaration = do
   prop <- property
54824921
   value <- withSpace (char ':') >> expr
eccab7bf
   optional prio
   return (prop,value)
 
 prio :: GenParser Char st String
 prio = char '!' >> spaces >> string "important"
 
 -- TODO: Still need to implement comments . . . 
 
 expr :: GenParser Char st String
 expr = do
   fst <- term
   rst <- many $ do
     op <- optionMaybe $ liftM (:[]) operator
     trm <- term
     return $ case op of
       Nothing -> trm
       Just x -> x ++ (' ':trm)
   return $ fst ++ foldr (\x y -> (' ':x++(' ':y))) [] rst
 
 term :: GenParser Char st String
54824921
 term = withSpace numerical <|> withSpace uri <|> withSpace string_ <|> withSpace ident <|> hexcolor <|> function
eccab7bf
 
 numerical :: GenParser Char st String
 numerical = do
54824921
   unop <- optionMaybe unaryOperator
eccab7bf
   value <- withSpace (try percentage) <|> withSpace (try length) <|> withSpace (try ems) <|> withSpace (try exs)
            <|> withSpace (try angle) <|> withSpace (try time) <|> withSpace (try freq) <|> withSpace number 
   return $ case unop of
     Just '-' -> '-':value
     _ -> value
   where
     number  :: GenParser Char st String
     number = num
 
     percentage :: GenParser Char st String
     percentage = num <++> string "%"
 
     length :: GenParser Char st String
54824921
     length = num <++> foldr1 (<|>) (map string ["px", "cm", "mm", "in", "pt", "pc", "rem", "vw", "vh"])
eccab7bf
 
     ems  :: GenParser Char st String
     ems = num <++> string "em"
 
     exs  :: GenParser Char st String
     exs = num <++> string "ex"
 
     angle  :: GenParser Char st String
     angle = num <++> (string "deg" <|> string "rad" <|> string "grad")
 
     time  :: GenParser Char st String
     time = num <++> (string "ms" <|> string "s")
 
     freq  :: GenParser Char st String
     freq = num <++> (string "hz" <|> string "khz")
       
 
 function :: GenParser Char st String
 function = do
   _ <- ident
   _ <- string "("
   spaces
   d <- expr
   _ <- string ")"
   spaces
54824921
   return d
eccab7bf
 
 hexcolor :: GenParser Char st String
 hexcolor = char '#' <:> many1 hexDigit
   
 uri :: GenParser Char st String
 uri = do
   a <- string "url("
54824921
   result <- w >> (try string_ <|> url)
   d <- w >> string ")"
eccab7bf
   return $ a ++ result ++ d
 
 
 property :: GenParser Char st String
 property = do
   result <- ident
   spaces
   return result
 
 parseCSS :: String -> Either ParseError CSSDeclaration
 parseCSS = parse stylesheet  "(unknown)"