{-# LANGUAGE ScopedTypeVariables, ParallelListComp #-}

import Control.Applicative ((<$>))
import Control.Monad (liftM3, when)
import Data.Array.Base (UArray, amap, bounds, listArray, (//), (!))
import Data.Binary.Put (runPut)
import qualified Data.ByteString.Lazy as BS
import Data.List (groupBy, sortBy, intersperse)
import Data.Ord (comparing)
import Data.Maybe (fromMaybe)
import Text.Regex.Posix ((=~))
import Text.HTML.TagSoup (parseTags, partitions, isTagOpenName)
import Text.HTML.TagSoup.Tree (tagTree)

import AcrossLite (Crossword(Crossword), putAcrossLite)
import TraverseHTML

-- X-Word can't handle Unicode, it seems.
replaceChars = map replaceChar
    where replaceChar '—' = '-'
          replaceChar x = x

{-
Clues are represented in simple lists:

  <ol>
    <li><label id="1-across-clue" for="1-across">
      <span class="clue-number">1</span>
      Have great and unex­pect­ed success (3,3,7)</label></li>
  ...
-}

parseClueList ol = do
    li <- children (branchHasName "li") ol
    label <- children (branchHasName "label") li
    number <- children (branchHasName "span") label >>= text
    clueText <- text label
    return (read number :: Int, replaceChars clueText)

{-
Solution positions can be extracted by looking at style attributes in the grid
HTML:

  <div id="1-across" style="z-index: 1; left: 0px; top: 0px;" class="across">
-}

parsePositions cls t = do
    d <- children (branchHasName "div" <&> branchHasClass cls) t
    style <- branchAttr "style" d
    let styleAttrs = [(name, value) |
                [_, name, value] <- (style =~ "(\\S+): ([^;]+)" :: [[String]])]
        (left :: Int, top :: Int, n :: Int) =
            fromMaybe (error "can't parse grid style") $ liftM3 (,,)
                -- For left and top, extract number prefix in e.g. "0px".
                (fst <$> head <$> reads <$> lookup "left" styleAttrs)
                (fst <$> head <$> reads <$> lookup "top" styleAttrs)
                -- Clue number doubles as Z index.
                (read <$> lookup "z-index" styleAttrs)
    return (n, (left `div` 29, top `div` 29))

{-
Solutions are represented letter by letter:

  solutions["1-across-1"] = "H";
  solutions["1-across-2"] = "I";
  solutions["1-across-3"] = "T";
  ...
-}

parseSolutions script = (acrossSolutions, downSolutions)
    where
    acrossSolutions = [(n, str) | (("across", n), str) <- solutions]
    downSolutions = [(n, str) | (("down", n), str) <- solutions]

    solutions = groupFirsts [((direction, read n :: Int), char) |
        [_fullMatch, n, direction, [char]] <- letters]

    groupFirsts = map concatSeconds . groupBy (\a b -> fst a == fst b)

    concatSeconds [] = error ""
    concatSeconds ((x, y) : xys) = (x, y : map snd xys)

    letters = script =~
        "\n\t+solutions\\[\"([0-9]+)-(across|down)-[^\"]+\"\\] = \"(.)\""
            :: [[String]]

parseHTML s = do
    let tags = parseTags s

        -- Extract clues.
        (clues : _) = partitions (isTagOpenNameId "div" "clues") tags
        (acrossList : downList : _) = partitions (isTagOpenName "ol") clues
        acrossClues = parseClueList $ head $ tagTree acrossList
        downClues = parseClueList $ head $ tagTree downList

        -- Extract solution positions in grid.
        (grid : _) = partitions (isTagOpenNameId "div" "grid") tags
        gridTree = head $ tagTree grid
        acrossPositions = parsePositions "across" gridTree
        downPositions = parsePositions "down" gridTree

        -- Extract solutions.
        (crossword : _) = partitions
            (isTagOpenName "div" <&> hasAttribValue "class" "crossword") tags
        crosswordTree = head $ tagTree crossword
        scriptText = concat $
            children (branchHasName "script") crosswordTree >>= text
        (acrossSolutions, downSolutions) = parseSolutions scriptText

    -- Sanity check.
    when (not (firstsEqual acrossClues acrossPositions) ||
          not (firstsEqual acrossClues acrossSolutions)) $
        error "inconsistency in across clues"
    when (not (firstsEqual downClues downPositions) ||
          not (firstsEqual downClues downSolutions)) $
        error "inconsistency in down clues"

    let -- Combine clues, solution positions and solutions.
        across = [(n, clue, pos, solution)
                    | (n, clue) <- acrossClues
                    | (_n, pos) <- acrossPositions
                    | (_n', solution) <- acrossSolutions]
        down = [(n, clue, pos, solution)
                    | (n, clue) <- downClues
                    | (_n, pos) <- downPositions
                    | (_n', solution) <- downSolutions]

        width = maximum [x + length solution
                    | (_, _, (x, _), solution) <- across]
        height = maximum [y + length solution
                    | (_, _, (_, y), solution) <- down]

    return (width, height, across, down)

    where firstsEqual a b = length a == length b &&
              (all (uncurry (==)) $ zip (map fst a) (map fst b))

flipGrid a = let (_, (bx, by)) = bounds a in
                 [a ! (y, x) | y <- [0..by], x <- [0..bx]]

formatGrid g = unlines (map formatRow rows)
    where (_, (w, h)) = bounds g
          rows = [[g ! (y, x) | x <- [0..w]] | y <- [0..h]]
          formatRow = (' ':) . intersperse ' '

packStr = BS.pack . map (fromIntegral . fromEnum)

main = do
    contents <- getContents
    (width, height, across, down) <- parseHTML contents
    let solved :: UArray (Int, Int) Char
        -- A dot represents a black square, a dash a white one.
        solved = listArray ((0, 0), (height - 1, width - 1)) (repeat '.')
                   // [((y, x + i), c) |
                       (_, _, (x, y), solution) <- across,
                       (i, c) <- zip [0..] solution]
                   // [((y + i, x), c) |
                       (_, _, (x, y), solution) <- down,
                       (i, c) <- zip [0..] solution]
        grid = amap (\c -> if c == '.' then '.' else '-') solved
        clues = map snd $
            sortBy (comparing fst) $
            map (\(n, clue, _, _) -> (n, clue)) (across ++ down)
    BS.putStr $ runPut $ putAcrossLite $
        Crossword (packStr "title") (packStr "author") (packStr "copyright")
            width height (map packStr clues)
            (packStr $ flipGrid solved) (packStr $ flipGrid grid)
