-- An Earley style parser for SCFGs {-# OPTIONS_GHC -Wall -fno-warn-orphans #-} module NLP.ScfgEarley where import Control.Arrow ((***),second) import Control.Exception (assert) import Control.Monad (guard, foldM) import Data.List (intercalate, delete) import qualified Data.Map as Map import Data.Maybe (maybeToList, mapMaybe, isNothing) import Data.Tree import Data.DList (DList(..)) import qualified Data.DList as DL import NLP.ChartParser import NLP.ScfgStuff import NLP.Tree (showBrackets) import NLP.ChartParserUtils (WordMap, toWordMap) import NLP.ChartDebugger (debugger) debug :: Scfg -> String -> String -> IO () debug gram s1 s2 = do debugger filts steps where filts = [ ("symbol", symbol) ] steps = takeWhile (not . agendaNull . agenda) $ iterate (chartStep config) initSt (config, initSt) = initStuff gram (s1,s2) {- go :: Scfg -> String -> String -> String go gram s1 s2 = intercalate "\n\n" . map drawPair . parseTrees gram wmap1 $ chartParse config initSt where drawPair (t1,t2) = showBrackets t1 ++ "\n" ++ showBrackets t2 (config, initSt) = initStuff gram (s1,s2) wmap1 = toWordMap . words $ s1 -} goS :: Scfg -> String -> String -> Bool goS gram s1 s2 = successful config $ chartParse config initSt where (config, initSt) = initStuff gram (s1,s2) goR :: Scfg -> String -> String -> String goR gram s1 s2 = unlines . map show . rulesUsed gram $ chartParse config initSt where (config, initSt) = initStuff gram (s1,s2) -- ---------------------------------------------------------------------- -- configuration of the chart parser -- ---------------------------------------------------------------------- data ChartItem = ChartItem { symbol :: String , beforeDot1 :: DList String , afterDot1 :: [String] , indices1 :: (Int,Int) , beforeDot2 :: DList String , afterDot2 :: [String] , mIndices2 :: Maybe (Int,Int) } deriving (Show, Eq, Ord) completed :: ChartItem -> Bool completed ci = null (afterDot2 ci) && null (afterDot1 ci) && isJust (mIndices2 ci) initStuff :: Scfg -> (String,String) -> (SimpleConfig ChartItem, SimpleStatus ChartItem) initStuff gram p_s = (config, st) where st = Status { agenda = axioms gram (fst p_wmap), chart = [] } config = toyConfig gram p_wmap p_wmap = toWordMapPair p_s toWordMapPair :: (String,String) -> (WordMap,WordMap) toWordMapPair = f *** f where f = toWordMap . words -- ---------------------------------------------------------------------- -- configuration of NLP.ChartParser -- ---------------------------------------------------------------------- toyConfig :: Scfg -> (WordMap,WordMap) -> SimpleConfig ChartItem toyConfig gram p_wmap@(wmap1,wmap2) = Config { inferenceRules = earleyInferenceRules gram p_wmap , isGoalItem = \s -> completed s && symbol s == start gram && indices1 s == (0, rmax wmap1 + 1) && mIndices2 s == Just (0, rmax wmap2 + 1) } where rmax = maximum . Map.keys -- ---------------------------------------------------------------------- -- axioms -- ---------------------------------------------------------------------- axioms :: Scfg -> [ChartItem] axioms gram = map mkAxiom1 rulez where rulez = filter (\x -> scfgLhs x == start gram) $ rules gram mkAxiom1 (ScfgRule l rs1 rs2) = ChartItem { symbol = l , beforeDot1 = DL.empty , beforeDot2 = DL.empty , afterDot1 = rs1 , afterDot2 = rs2 , indices1 = (0,0) , mIndices2 = Nothing } -- ---------------------------------------------------------------------- -- inference rules -- ---------------------------------------------------------------------- earleyInferenceRules :: Scfg -> (WordMap, WordMap) -> [ChartItem] -> ChartItem -> [ChartItem] earleyInferenceRules gram wmapp@(wmap1,_) chrt aItem = map snd results where -- if you uncomment showNewItem you can track how items are produced results = maybeCons (tagWith "SCAN" (scanR wmapp) aItem) . maybeCons (tagWith "S-PREDICT" (sPredictR gram wmap1) aItem) $ concat [ tagWith "PRED 1" (predictR1 gram) aItem , mapMaybe (tagWith "PRED2" predict2AC) cItems , mapMaybe (tagWith "PRED2" predict2CA) cItems , mapMaybe (tagWith "COMP" completeCA) cItems , mapMaybe (tagWith "COMP" completeAC) cItems ] -- showNewItem (l,i) = l ++ " " ++ show i completeCA c = completeR c aItem completeAC c = completeR aItem c predict2CA c = predictR2 gram c aItem predict2AC c = predictR2 gram aItem c tagWith x f = fmap (\y -> (x,y)) . f cItems = chrt -- this implements both scan rules scanR :: (WordMap, WordMap) -> ChartItem -> Maybe ChartItem scanR (wmap1,wmap2) aItem = case mIndices2 aItem of Nothing -> scanHelper wmap1 right1 afterDot1 newItem1 Just (_, right2) -> scanHelper wmap2 right2 afterDot2 newItem2 where (_,right1) = indices1 aItem scanHelper wmap0 right0 afterDot0 newItem0 = do lexW <- Map.lookup right0 wmap0 (itW,after) <- maybeHeadTail (afterDot0 aItem) guard (lexW == itW) return $ newItem0 itW after -- newItem1 w after = aItem { beforeDot1 = beforeDot1 aItem `DL.snoc` w , afterDot1 = after , indices1 = second (+1) $ indices1 aItem } newItem2 w after = aItem { beforeDot2 = beforeDot2 aItem `DL.snoc` w , afterDot2 = after , mIndices2 = second (+1) `fmap` mIndices2 aItem } -- seems ugly: only for use with start symbol sPredictR :: Scfg -> WordMap -> ChartItem -> Maybe ChartItem sPredictR gram wmap1 item = if isHalfComplete item then Just $ item { mIndices2 = Just (0,0) } else Nothing where isHalfComplete x = symbol x == startSym && indices1 x == (0,end) && null (afterDot1 x) && isNothing (mIndices2 x) startSym = start gram end = rmax wmap1 + 1 rmax = maximum . Map.keys predictR1 :: Scfg -> ChartItem -> [ChartItem] predictR1 gram bItem = case mIndices2 bItem of Nothing -> case afterDot1 bItem of [] -> [] (ic:_) -> let isNext x = scfgLhs x == ic in map toItem1 . filter isNext . rules $ gram Just _ -> [] where iRight1 = snd . indices1 $ bItem toItem1 (ScfgRule c cs1 cs2) = ChartItem { symbol = c , beforeDot1 = DL.empty , beforeDot2 = DL.empty , afterDot1 = cs1 , afterDot2 = cs2 , indices1 = (iRight1, iRight1) , mIndices2 = Nothing } predictR2 :: Scfg -> ChartItem -> ChartItem -> Maybe ChartItem predictR2 gram cItem bItem = do right2 <- snd `fmap` mIndices2 bItem ic <- listToMaybe $ afterDot2 bItem guard (symbol cItem == ic && null (afterDot1 cItem) && isNothing (mIndices2 cItem)) return $ cItem { mIndices2 = Just (right2,right2) } completeR :: ChartItem -> ChartItem -> Maybe ChartItem completeR bItem cItem = maybe completeR1 completeR2 $ mIndices2 bItem where completeR1= do do let (bLeft, bRight) = indices1 bItem (cLeft, cRight) = indices1 cItem guard (null (afterDot1 cItem) && bRight == cLeft) (bC,bRest) <- maybeHeadTail (afterDot1 bItem) guard (bC == symbol cItem) return $ bItem { beforeDot1 = beforeDot1 bItem `DL.snoc` bC , afterDot1 = bRest , indices1 = (bLeft, cRight) } completeR2 (bLeft, bRight) = do (cLeft, cRight) <- mIndices2 cItem guard (null (afterDot1 cItem) && null (afterDot2 cItem) && bRight == cLeft) (bC,bRest) <- maybeHeadTail (afterDot2 bItem) guard (bC == symbol cItem) return $ bItem { beforeDot2 = beforeDot2 bItem `DL.snoc` bC , afterDot2 = bRest , mIndices2 = Just (bLeft, cRight) } -- ---------------------------------------------------------------------- -- returning results -- ---------------------------------------------------------------------- {- -- I have a sneaking suspicion that there is a far simpler way to -- write this... intuition tells me that some of the checks here are -- just redundant code parseTrees :: Scfg -> WordMap -> Status ChartItem -> [Tree String] parseTrees (Scfg start _) wmap st = assert (all (\r -> length r == 1) result) -- exactly 1 $ map head result where -- showCChart = unlines . map show $ cchart result = helper start [] 0 (rmax + 1) lchart = map (uncurry mkLexItem) . Map.toList $ wmap cchart = filter completed . chart $ st rmax = maximum $ Map.keys wmap helper :: String -> [String] -> Int -> Int -> [[Tree String]] helper c cs l r = do -- trace (unwords $ ["at", show l, show r, c] ++ cs) $ guard True let isMatch = if null cs then isLastNode else isLeftNode ti <- filter isLastNode lchart ++ filter isMatch cchart -- trace (unwords $ ["--", show l, show r, c, show ti]) $ guard True let rti :: Int rti = right ti guard $ case cs of -- avoid left recursion loop [] -> rti == r (c2:_) -> any (\t -> left t == rti && symbol t == c2) cchart -- trace (unwords $ ["--", show l, show r, c, "kids"]) $ guard True kids <- case DL.toList (beforeDot ti) of [] -> return [] (kc:kcs) -> helper kc kcs l rti -- trace (unwords $ ["--", show l, show r, c, "siblings"]) $ guard True siblings <- case cs of [] -> return [] (c2:cs2) -> helper c2 cs2 rti r return $ Node c kids : siblings where isLeftNode t = left t == l && right t < r && symbol t == c isLastNode t = left t == l && right t == r && symbol t == c mkLexItem :: Int -> String -> ChartItem mkLexItem i w = ChartItem { symbol = w , beforeDot = DL.empty , afterDot = [] , left = i , right = i+1 } -} -- ---------------------------------------------------------------------- -- returning results -- ---------------------------------------------------------------------- maybeCons :: Maybe a -> [a] -> [a] maybeCons = maybe id (\x -> (x :)) maybeHeadTail :: [a] -> Maybe (a,[a]) maybeHeadTail [] = Nothing maybeHeadTail (x:xs) = Just (x,xs)