Syslog Parserを haskellで書いてみる。
過去にBisonのErlang版 Yeccを使ってパーサーを書いたことがあるのですが、作成数ヶ月後にソースコードを読んでみても何をしているのかさっぱり忘れてしまい苦労したことがあります。
おまけにデバッグ大変ですし・・・・
今回はHaskell のattoparsecライブラリを使ってRFC3164のパーサーを書いてみました。
Real world haskellのChapter 16. Using Parsecではattoparsecのベースとなったparsecの説明があるので参考になると思います。
Real world haskellのChapter 27. Sockets and Syslogでもsyslog serverの実装方法が取り上げていますが、ただ単にsyslog serverが受信したデータをputStrLnで表示しているだけなので、少々素っ気ないです。
RFC3164詳細はこちら
http://www.ietf.org/rfc/rfc3164.txt
まずはReal world haskellの- file: ch27/SyslogTypes.hs を少し変更
module SyslogPriority ( Severity , Facility , codeOfFac , facOfCode , codeOfPri , priOfCode ) where data Severity = DEBUG | INFO | NOTICE | WARNING | ERROR | CRITICAL | ALART | EMERGENCY deriving (Show, Eq, Read, Enum, Ord) priToCode :: [(Severity, Int)] priToCode = [ (DEBUG, 7), (INFO, 6), (NOTICE, 5), (WARNING, 4), (ERROR, 3), (CRITICAL, 2), (ALART, 1), (EMERGENCY, 0) ] codeOfPri :: Severity -> Int codeOfPri p = case lookup p priToCode of Just x -> x _ -> error $ "Internal error in priOfCode" priOfCode :: Int -> Severity priOfCode p = case lookup p $ codeTo priToCode of Just x -> x _ -> error $ "Internal error in codeOfPri" data Facility = KERN | USER | MAIL | DAEMON | AUTH | SYSLOG | LPR | NEWS | UUCP | CRON | AUTHPRIV | FTP | NTP | LOGAUDIT | LOGWARNING | CLOCKDAEMON | LOCAL0 | LOCAL1 | LOCAL2 | LOCAL3 | LOCAL4 | LOCAL5 | LOCAL6 | LOCAL7 deriving (Show, Eq, Ord) facToCode :: [(Facility, Int)] facToCode = [ (KERN, 0), (USER, 1), (MAIL, 2), (DAEMON, 3), (AUTH, 4), (SYSLOG, 5), (LPR, 6), (NEWS, 7), (UUCP, 8), (CRON, 9), (AUTHPRIV, 10), (FTP, 11), (NTP, 12), (LOGAUDIT, 13), (LOGWARNING, 14), (CLOCKDAEMON, 15), (LOCAL0, 16), (LOCAL1, 17), (LOCAL2, 18), (LOCAL3, 19), (LOCAL4, 20), (LOCAL5, 21), (LOCAL6, 22), (LOCAL7, 23) ] codeOfFac :: Facility -> Int codeOfFac f = case lookup f facToCode of Just x -> x _ -> error $ "Internal error in codeOfFac" facOfCode :: Int -> Facility facOfCode f = case lookup f $ codeTo facToCode of Just x -> x _ -> error $ "Internal error in facOfCode" codeTo :: [(x, y)] -> [(y, x)] codeTo = map (\(x, y) -> (y, x))
そしてパーサー RFC3164.hs
{-# LANGUAGE OverloadedStrings #-} module RFC3164 ( parseRFC3164 , RFC3164(..) , PRI(..) , Header(..) , Msg(..) ) where import SyslogPriority import Control.Applicative import Data.Attoparsec as P import qualified Data.Attoparsec.Char8 as P8 import Data.Attoparsec.Char8 (char8, endOfLine) import qualified Data.ByteString.Char8 as B hiding (map) {- http://www.ietf.org/rfc/rfc3164.txt -} -- SYSLOG-MSG = PRI SP HEADER SP MSG data RFC3164 = RFC3164 { pri :: PRI , header :: Header , msg :: Msg } deriving (Eq, Ord, Show) -- | parser for rfc 3164 -- >>> :set -XOverloadedStrings -- >>> parseTest parseRFC3164 "<34>Oct 11 22:14:15 mymachine su: 'su root' failed for lonvick on /dev/pts/8\n " -- Done " " RFC3164 {pri = PRI {severity = CRITICAL, facility = AUTH}, header = Header {timestamp = "Oct 11 22:14:15", hostName = "mymachine"}, msg = Msg {tag = Just "su", procId = Nothing, content = " 'su root' failed for lonvick on /dev/pts/8"}} -- -- >>> parseTest parseRFC3164 "<0>Oct 22 10:52:12 scapegoat apache[2321]: 10.1.2.3 sched[0]: That's All Folks!\n " -- Done " " RFC3164 {pri = PRI {severity = EMERGENCY, facility = KERN}, header = Header {timestamp = "Oct 22 10:52:12", hostName = "scapegoat"}, msg = Msg {tag = Just "apache", procId = Just 2321, content = " 10.1.2.3 sched[0]: That's All Folks!"}} -- parseRFC3164 :: Parser RFC3164 parseRFC3164 = do pri' <- parsePRI header' <- parseHeader msg' <- parseMsg return $ RFC3164 pri' header' msg' -- PRI = '<' INT '>' -- INT = 1*3 DIGIT ; range 0 .. 191 -- data PRI = PRI { severity :: Severity , facility :: Facility } deriving (Eq, Ord, Show) -- | facrity and priority check -- -- Example: -- -- >>> parseTest parsePRI "<0> " -- Done " " PRI {severity = EMERGENCY, facility = KERN} -- -- >>> parseTest parsePRI "<191> " -- Done " " PRI {severity = DEBUG, facility = LOCAL7} -- parsePRI :: Parser PRI parsePRI = do pri' <- char8 '<' *> P8.decimal <* char8 '>' return $ PRI (parseSeverity pri') (parseFac pri') parseFac :: Int -> Facility parseFac pri' = facOfCode $ pri' `div` 8 parseSeverity :: Int -> Severity parseSeverity pri' = priOfCode $ pri' `mod` 8 -- HEADER = TIMESTAMP SP HOSTNAME -- TIMESTAMP = MMM SP DD SP HH COL MM COL SS -- MMM = 3PRINTUSASCII ; Jan Feb Mar Arp May Jun Jul Aug Sep Oct Nov Dev -- DD = 2DIGIT ; <SP>1-28, <SP>1-29, <SP>1-30, <SP>1-31 -- HH = 2DIGIT ; 00-23 -- MM = 2DIGIT ; 00-59 -- HOSTNAME = IPv4 | IPv6 | PRINTUSASCII/DIGIT -- IPv4 = 3NONZERO-DIGIT DOT 3NONZERO-DIGIT DOT 3NONZERO-DIGIT -- IPv6 = data Header = Header { timestamp :: !B.ByteString , hostName :: !B.ByteString } deriving (Show, Eq, Ord) -- | header parser check -- -- example: -- -- >>> parseTest parseHeader "Feb 12 19:27:01 hostname " -- Done "" Header {timestamp = "Feb 12 19:27:01", hostName = "hostname"} -- -- >>> parseTest parseHeader "Feb 2 19:27:01 hostname " -- Done "" Header {timestamp = "Feb 2 19:27:01", hostName = "hostname"} -- parseHeader :: Parser Header parseHeader = do time <- P8.take 15 <* char8 ' ' host <- takeTill P8.isHorizontalSpace <* char8 ' ' return $ Header time host data Msg = Msg { tag :: Maybe B.ByteString , procId :: Maybe Int , content :: !B.ByteString } deriving (Eq, Ord, Show) --MSG = TAG COL CONTENT --TAG = 1*32 PROCNAME [ PID ]: --PROCNAME = PRINTUSASCII / DIGIT --TAGID = DIGIT --CONTENT = PRINTUSASCII --SP = %d32 --PRINTUSASCII = %d33-126 --NONZERO-DIGIT = %d49-57 --DIGIT = %d48 / NONZERO-DIGIT --DOT = %46 --COL = %58 --[ = %91 --] = %93 -- | msg parser check -- -- example: -- -- >>> parseTest parseMsg "app[12]: message starting \n" -- "12]: message starting \n" -- Done "" Msg {tag = Just "app", procId = Just 12, content = " message starting "} -- -- >>> parseTest parseMsg "app: message starting \n" --"message starting \n" -- Done "" Msg {tag = Just "app", procId = Nothing, content = " message starting "} -- -- parseMsg :: Parser Msg parseMsg = do tag' <- Just <$> try (takeTill (\c -> inClass "[:" c)) <|> pure Nothing <$> char8 '[' pid' <- Just <$> try (char8 '[' *> P8.decimal <* char8 ']' <* char8 ':') <|> pure Nothing <$> char8 ':' content' <- takeTill P8.isEndOfLine <* endOfLine return $ Msg tag' pid' content'
doctest(ghc7.4.2)で動作を確認
※doctestがない場合は "cabal install doctest"でインストールください
$ doctest RFC3164.hs There are 4 tests, with 9 total interactions. Examples: 4 Tried: 4 Errors: 0 Failures: 0
network-conduitとattoparsec-conduitを参考にsyslog server(collector)を実装してみるとおもしろいかもしれません。