diff --git a/packages/rssfeeds/Folders.hs b/packages/rssfeeds/Folders.hs new file mode 100644 index 00000000..da59682a --- /dev/null +++ b/packages/rssfeeds/Folders.hs @@ -0,0 +1,82 @@ +module Main (main) where + +import Data.List.Extra qualified as List +import Data.String.Interpolate (i) +import Data.Text qualified as Text +import Data.Time (UTCTime) +import Data.Time qualified as Time +import Network.Wreq qualified as Wreq +import Relude +import Relude.Extra ((^.)) +import Text.Atom.Feed qualified as Feed +import Text.Atom.Feed.Export qualified as Feed +import Text.HTML.TagSoup qualified as TagSoup +import Witch (into) + +extractItem :: [TagSoup.Tag Text] -> Maybe (Text, Text, UTCTime) +extractItem x = case ( List.firstJust extractFolder . filter (TagSoup.isTagOpenName "a") $ x + , TagSoup.maybeTagText <=< (find TagSoup.isTagText . dropWhile (\tag -> not (TagSoup.isTagOpenName "td" tag && "date" == TagSoup.fromAttrib "class" tag))) $ x + ) of + (Just (link, title), Just date_str) | Just date <- Time.parseTimeM True Time.defaultTimeLocale "%Y-%b-%d %H:%M" (toString date_str) -> Just (link, title, date) + _ -> Nothing + +extractIndex :: Text -> IO [(Text, Text, UTCTime)] +extractIndex = \url -> + mapMaybe extractItem + . List.split (TagSoup.isTagOpenName "tr") + . dropWhile (not . TagSoup.isTagOpenName "tbody") + . TagSoup.parseTags + . decodeUtf8 + . (^. Wreq.responseBody) + <$> Wreq.get [i|#{url}|] + +collectEntries :: Text -> (Text, Text, UTCTime) -> IO [(Feed.Entry)] +collectEntries = \url (link, title, date) -> + if Text.isSuffixOf "/" link + then do + entries <- extractIndex [i|#{url}#{link}|] + join <$> forM entries (collectEntries [i|#{url}#{link}|]) + else + pure + [ ( ( Feed.nullEntry + (Text.dropAround (== '/') link) + (Feed.TextString title) + (timestamp date) + ) + { Feed.entryLinks = [Feed.nullLink [i|#{url}#{link}|]] + } + ) + ] + +main :: IO () +main = do + [root_dir] <- getArgs + folders <- extractIndex (toText root_dir) + feeds <- forM folders \x@(link, title, date) -> do + let path = Text.dropAround (== '/') link + entries <- collectEntries [i|#{root_dir}/|] x + let num = length entries + emptyFeed = + Feed.nullFeed + [i|#{path}-#{timestamp date}|] + (Feed.TextString title) + (timestamp date) + feed = fromMaybe (error "Could not produce feed.") $ Feed.textFeed emptyFeed{Feed.feedEntries = entries} + writeFileLText [i|#{title}.xml|] feed + pure [i|
  • #{title} (#{num} entries, newest from #{timestamp date})
  • |] + writeFileText [i|index.html|] $ + Text.unlines $ + ["", "", "", "Available RSS Feeds", "", "", "

    Available RSS Feeds

    ", "", "", ""] + +timestamp :: UTCTime -> Text +timestamp = into . Time.formatTime Time.defaultTimeLocale "%Y-%m-%d %H:%M" + +extractFolder :: TagSoup.Tag Text -> Maybe (Text, Text) +extractFolder = \tag -> + let + title = TagSoup.fromAttrib "title" tag + href = TagSoup.fromAttrib "href" tag + in + if Text.null title || Text.null href + then Nothing + else Just (href, title) diff --git a/packages/rssfeeds/default.nix b/packages/rssfeeds/default.nix index 53238445..a25f8fee 100644 --- a/packages/rssfeeds/default.nix +++ b/packages/rssfeeds/default.nix @@ -20,6 +20,7 @@ text, time, witch, + wreq, }: mkDerivation { pname = "rssfeeds"; @@ -47,6 +48,7 @@ mkDerivation { text time witch + wreq ]; license = "unknown"; } diff --git a/packages/rssfeeds/hie.yaml b/packages/rssfeeds/hie.yaml index f0c7014d..daf97e6f 100644 --- a/packages/rssfeeds/hie.yaml +++ b/packages/rssfeeds/hie.yaml @@ -1,2 +1,2 @@ cradle: - cabal: \ No newline at end of file + cabal: {} diff --git a/packages/rssfeeds/rssfeeds.cabal b/packages/rssfeeds/rssfeeds.cabal index 5f65e1a6..34ea3131 100644 --- a/packages/rssfeeds/rssfeeds.cabal +++ b/packages/rssfeeds/rssfeeds.cabal @@ -74,6 +74,25 @@ executable mail2rss default-language: GHC2021 +executable folders2rss + import: common-options + main-is: Folders.hs + build-depends: + , base + , containers + , extra + , feed >=1.3.0.0 + , filepattern + , relude + , string-interpolate + , tagsoup + , text + , time + , witch + , wreq + + default-language: GHC2021 + executable mastodon2rss import: common-options main-is: Mastodon.hs