-- Copyright (C) 2008 Eric Kow -- This is to be BSD3 licensed module NLP.ToyCky where import Control.Applicative import Control.Monad (guard) import Data.List (intercalate) import Data.Maybe (mapMaybe, catMaybes) import Data.Tree import NLP.ChartParser import NLP.BinarisedCfg go :: BinarisedCfg -> String -> String go cfg s = intercalate "\n\n" . map drawTree . parseTrees cfg s $ parse cfg s -- ---------------------------------------------------------------------- -- configuration of the chart parser -- ---------------------------------------------------------------------- data ToyItem = ToyItem { symbol :: String , left :: Int , right :: Int } deriving (Show, Eq) parse :: BinarisedCfg -> String -> SimpleStatus ToyItem parse cfg s = chartParse (toyConfig cfg) initialSt where initialSt = Status { agenda = axioms cfg (words s) , chart = [] } -- ---------------------------------------------------------------------- -- configuration of NLP.ChartParser -- ---------------------------------------------------------------------- toyConfig :: BinarisedCfg -> SimpleConfig ToyItem toyConfig gram = Config { inferenceRules = ckyInferenceRules gram , isGoalItem = \s -> symbol s == start gram } -- ---------------------------------------------------------------------- -- axioms -- ---------------------------------------------------------------------- axioms :: BinarisedCfg -> [String] -> [ToyItem] axioms cfg ss = concat $ zipWith (toAxiom cfg) ss [0..] toAxiom :: BinarisedCfg -> String -> Int -> [ToyItem] toAxiom cfg w i = mkItem w : [ mkItem c | UnaryRule c w2 <- rules cfg, w == w2 ] where wItem = mkItem w -- this isn't strictly necessary, but it's convenient for recovering parse trees mkItem x = ToyItem x i (i+1) -- ---------------------------------------------------------------------- -- inference rules -- ---------------------------------------------------------------------- ckyInferenceRules :: BinarisedCfg -> [ToyItem] -> ToyItem -> [ToyItem] ckyInferenceRules (BinarisedCfg _ rs) chart aItem = catMaybes (completeAC <$> rs <*> chart) ++ catMaybes (completeCA <$> rs <*> chart) where completeAC r c = complete r aItem c completeCA r c = complete r c aItem complete :: BinarisedCfgRule -> ToyItem -> ToyItem -> Maybe ToyItem complete (BinaryRule ra (rb,rc)) bItem cItem = if bsym == rb && csym == rc && bmiddle == cmiddle then Just $ ToyItem ra bleft cright else Nothing where bsym = symbol bItem csym = symbol cItem bleft = left bItem bmiddle = right bItem cmiddle = left cItem cright = right cItem complete _ _ _ = Nothing -- ---------------------------------------------------------------------- -- returning results -- ---------------------------------------------------------------------- -- I have a sneaking suspicion that there is a far simpler way to -- write this... intuition tells me that some of the checks here are -- just redundant code parseTrees :: BinarisedCfg -> String -> SimpleStatus ToyItem -> [Tree String] parseTrees (BinarisedCfg start crules) str st = helper start 0 right where chrt = chart st right = length (words str) helper :: String -> Int -> Int -> [Tree String] helper c l r = do rule <- filter (\x -> lhs x == c) crules case rule of UnaryRule _ lx -> do guard $ any (matches lx l r) chrt return $ Node c [Node lx []] BinaryRule _ (lc,rc) -> do (ToyItem _ _ m) <- filter (lmatches lc l) chrt guard $ any (matches rc m r) chrt -- avoid left recursion loop lKid <- helper lc l m rKid <- helper rc m r return $ Node c [lKid, rKid] where lmatches c1 l1 t@(ToyItem c2 l2 _) = l2 == l1 && c2 == c1 matches c1 l1 r1 t@(ToyItem c2 l2 r2) = l2 == l1 && c2 == c1 && r2 == r1