
module TraverseHTML (
    hasAttribValue,
    isTagOpenNameId,
    branchHasName,
    branchHasAttrValue,
    branchHasId,
    branchHasClass,
    branchAttr,
    allChildren,
    children,
    text,
    (<&>))
where

import Data.Maybe (maybeToList)
import Text.HTML.TagSoup
import Text.HTML.TagSoup.Tree

hasAttribValue k v (TagOpen _ as) = lookup k as == Just v
hasAttribValue _ _ _ = False

isTagOpenNameId name id' t =
    isTagOpenName name t && hasAttribValue "id" id' t

branchHasName name (TagBranch name' _ _) = name == name'
branchHasName _ _ = False

branchHasAttrValue name value (TagBranch _ as _) =
    lookup name as == Just value
branchHasAttrValue _ _ _ = False

branchHasId = branchHasAttrValue "id"

branchHasClass = branchHasAttrValue "class"

branchAttr name (TagBranch _ as _) = maybeToList $ lookup name as
branchAttr _ _ = []

allChildren (TagBranch _ _ xs) = xs
allChildren _ = []

children :: (TagTree String -> Bool) -> TagTree String -> [TagTree String]
children test (TagBranch _ _ xs) = filter test xs
children _ _ = []

strip = stripLeft . reverse . stripLeft . reverse
    where stripLeft = dropWhile (`elem` "\n\t ")

text = concatMap childText . allChildren
    where childText (TagLeaf (TagText s)) =
              let s' = strip s in
                  case s' of
                      "" -> []
                      _ -> [s']
          childText _ = []

a <&> b = \t -> a t && b t

