module Algorithms where import Prelude hiding (null, lookup) import DataSet (empty, DataSet (..), mapWithKey, transform, fromList, (!), lookup, MetricPrime, unionWith, intersectionWith, null, size, toList, delete, Metric, handleLookups) import qualified DataSet as D import Data.Map (Map) import Data.Maybe import qualified Data.Foldable as F import Data.List (sortBy) {- NOTE - Do we want to return '0' when either entry does not exist, or do we want to return Nothing. For the moment I'm commenting out these functions & replacing them with ones that return Maybe. FEEL FREE TO TELL ME I'M WRONG - wchogg euclidean :: (Ord a, Ord b, Floating c) => a -> a -> DataSet a b c -> c euclidean = undefined pearson :: (Ord a, Ord b, Floating c) => a -> a -> DataSet a b c -> c pearson = undefined -} euclidean :: (Ord a, Ord b, Floating c) => Metric a b c euclidean = handleLookups euclidean' pearson :: (Ord a, Ord b, Floating c) => Metric a b c pearson = handleLookups pearson' euclidean' :: (Ord b, Floating c) => MetricPrime b c euclidean' m m' = if null m'' then Nothing else Just $ 1/(1+weight) where m'' = intersectionWith diff m m' diff v v' = (v-v')^2 weight = F.foldr (+) 0 m'' -- slightly modified version of one written by krassik pearson' :: (Ord b, Floating c) => MetricPrime b c pearson' p p' = if null p'' || (den == 0) then Nothing else Just r where p'' = intersectionWith (\a b->(a,b)) p p' n = fromIntegral $ size p'' (sum1,sum2) = F.foldr (pair3 (+)) (0,0) p'' (sum1Sq,sum2Sq) = F.foldr (pair3 (+)) (0,0) $ D.map (pair (^2)) p'' pSum = F.foldr (+) 0 $ D.map ( \(a,b)->a*b ) p'' num = pSum - (sum1*sum2/n) den= sqrt((sum1Sq-(sum1^2)/n)*(sum2Sq-(sum2^2)/n)) r = num /den {- topMatches :: a -> Int -> (a -> a -> DataSet a b c) -> [(a, c)] topMatches = undefined I propose taking out the Int argument & just returning the list because if we're being sufficiently lazy then the user will be able to just take n of the result & that's all that should be calculated. - wchogg -} topMatches :: (Ord a, Ord c) => a -> MetricPrime b c -> DataSet a b c -> [(a,c)] topMatches key metric (DataSet set) = reverse . sortBy f $ case lookup key set of Nothing -> [] Just m -> (fmap . fmap) fromJust . filter ((/= Nothing) . snd) . toList . fmap (metric m) $ delete key set where f (a,b) (c,d) = compare b d getRecommendations :: (Ord a,Ord c, Ord b, Floating c) => a -> MetricPrime b c -> DataSet a b c -> [(b, c)] getRecommendations key met (DataSet set)= reverse . sortBy f $ case lookup key set of Nothing -> [] Just m -> toList . fmap (\(a,b) -> a*b) . items $ m where set' = delete key $ set simMap m = fmap (fromMaybe 0 . met m) set' weighted m = mapWithKey (\k m' -> fmap (\v -> (v*(simMap m ! k), (simMap m !k))) m') set' items m = F.foldr (unionWith (pair3 (+))) empty . weighted $ m f (a,b) (c,d) = compare b d calculateSimilarItems :: (Ord a, Ord b, Ord c, Floating c) => Int -> DataSet a b c -> DataSet b b c calculateSimilarItems n d = DataSet . fmap fromList $ items' where d'@(DataSet items) = transform d items' = mapWithKey (\key _ -> take n $ topMatches key euclidean' d') items getRecommendedItems :: DataSet a b c -> DataSet b b c -> a -> [(b, c)] getRecommendedItems = undefined pair :: (a -> b) -> (a, a) -> (b, b) pair f (a,b) = (f a, f b) pair3 :: (a -> b -> c) -> (a, a) -> (b, b) -> (c, c) pair3 f (a,b) (c,d) = (f a c, f b d)