| 1 | ------------------------------------------------------------------------------- |
|---|
| 2 | -- | |
|---|
| 3 | -- Module : yaml2yeast |
|---|
| 4 | -- Copyright : (c) Oren Ben-Kiki 2007 |
|---|
| 5 | -- License : LGPL |
|---|
| 6 | -- |
|---|
| 7 | -- Maintainer : yaml-oren@ben-kiki.org |
|---|
| 8 | -- Stability : alpha |
|---|
| 9 | -- Portability : portable |
|---|
| 10 | -- |
|---|
| 11 | -- Convert an input YAML file to a YEAST (YAML Elaborate Atomic Syntax Tokens). |
|---|
| 12 | -- Command line options are: |
|---|
| 13 | -- |
|---|
| 14 | -- [@-h@ @-?@ @--help@] Prints short usage message |
|---|
| 15 | -- |
|---|
| 16 | -- [@-o@ @--output@ @file@] Specify output file (default is \"@-@\" for |
|---|
| 17 | -- stdout) |
|---|
| 18 | -- |
|---|
| 19 | -- [@-u@ @--unbuffered@] Disable @stdout@ buffering. |
|---|
| 20 | -- |
|---|
| 21 | -- [@-p@ @--production@ @production@] Specify the top production (default is |
|---|
| 22 | -- full YAML stream). |
|---|
| 23 | -- |
|---|
| 24 | -- [@-n@ @--n-value@ @n@] Specify the /n/ parameter of the production. |
|---|
| 25 | -- |
|---|
| 26 | -- [@-c@ @--c-value@ @c@] Specify the /c/ parameter of the production. |
|---|
| 27 | -- |
|---|
| 28 | -- [@-s@ @--s-value@ @s@] Specify the /s/ parameter of the production. |
|---|
| 29 | -- |
|---|
| 30 | -- [@-t@ @--t-value@ @t@] Specify the /t/ parameter of the production. |
|---|
| 31 | -- |
|---|
| 32 | -- [@input@] Specify the input file (default is \"@-@\" for stdin). |
|---|
| 33 | -- |
|---|
| 34 | -- The YEAST format is designed to allow trivial post-processing. Since YEAST |
|---|
| 35 | -- contains all the syntactical information contained in the original YAML |
|---|
| 36 | -- file, such processing can perform tasks which are impossible or difficult |
|---|
| 37 | -- using higher-level YAML parsers, such as pretty-printing. For example, the |
|---|
| 38 | -- @yeast2html@ program generates an HTML visualization of the syntactical |
|---|
| 39 | -- structure of the original YAML file. |
|---|
| 40 | -- |
|---|
| 41 | -- YEAST contains one line per token, where the first character is the token |
|---|
| 42 | -- code and the following characters are the input characters contained in the |
|---|
| 43 | -- token, if any. Non printable\/non ASCII characters (including line breaks) |
|---|
| 44 | -- are escaped using either \"@\\x@/XX/\", \"@\\u@/XXXX/\" or |
|---|
| 45 | -- \"@\\U@/XXXXXXXX/\" notation. Therefore YEAST files are restricted to 7-bit |
|---|
| 46 | -- printable ASCII, making them sufficiently human-readable for debugging |
|---|
| 47 | -- purposes. |
|---|
| 48 | -- |
|---|
| 49 | -- The YEAST token codes are: |
|---|
| 50 | -- |
|---|
| 51 | -- [@U@] BOM, contains \"@TF8@\", \"@TF16LE@\" or \"@TF16BE@\" |
|---|
| 52 | -- |
|---|
| 53 | -- [@T@] Contains presrved content text characters |
|---|
| 54 | -- |
|---|
| 55 | -- [@t@] Contains non-content (meta) text characters |
|---|
| 56 | -- |
|---|
| 57 | -- [@B@] Contains preserved content line break |
|---|
| 58 | -- |
|---|
| 59 | -- [@b@] Contains separation line break |
|---|
| 60 | -- |
|---|
| 61 | -- [@L@] Contains line break normalized to content line feed |
|---|
| 62 | -- |
|---|
| 63 | -- [@l@] Contains line break folded to content space |
|---|
| 64 | -- |
|---|
| 65 | -- [@I@] Contains character indicating structure |
|---|
| 66 | -- |
|---|
| 67 | -- [@w@] Contains separation white space |
|---|
| 68 | -- |
|---|
| 69 | -- [@i@] Contains indentation spaces |
|---|
| 70 | -- |
|---|
| 71 | -- [@K@] Document start marker |
|---|
| 72 | -- |
|---|
| 73 | -- [@k@] Document end marker |
|---|
| 74 | -- |
|---|
| 75 | -- [@E@] Begins escape sequence |
|---|
| 76 | -- |
|---|
| 77 | -- [@e@] Ends escape sequence |
|---|
| 78 | -- |
|---|
| 79 | -- [@C@] Begins comment |
|---|
| 80 | -- |
|---|
| 81 | -- [@c@] Ends comment |
|---|
| 82 | -- |
|---|
| 83 | -- [@D@] Begins directive |
|---|
| 84 | -- |
|---|
| 85 | -- [@d@] Ends directive |
|---|
| 86 | -- |
|---|
| 87 | -- [@G@] Begins tag |
|---|
| 88 | -- |
|---|
| 89 | -- [@g@] Ends tag |
|---|
| 90 | -- |
|---|
| 91 | -- [@H@] Begins tag handle |
|---|
| 92 | -- |
|---|
| 93 | -- [@h@] Ends tag handle |
|---|
| 94 | -- |
|---|
| 95 | -- [@A@] Begins anchor |
|---|
| 96 | -- |
|---|
| 97 | -- [@a@] Ends anchor |
|---|
| 98 | -- |
|---|
| 99 | -- [@P@] Begins node properties |
|---|
| 100 | -- |
|---|
| 101 | -- [@p@] Ends node properties |
|---|
| 102 | -- |
|---|
| 103 | -- [@R@] Begins alias (reference) |
|---|
| 104 | -- |
|---|
| 105 | -- [@r@] Ends alias (reference) |
|---|
| 106 | -- |
|---|
| 107 | -- [@S@] Begins scalar content |
|---|
| 108 | -- |
|---|
| 109 | -- [@s@] Ends scalar content |
|---|
| 110 | -- |
|---|
| 111 | -- [@Q@] Begins sequence content |
|---|
| 112 | -- |
|---|
| 113 | -- [@q@] Ends sequence content |
|---|
| 114 | -- |
|---|
| 115 | -- [@M@] Begins mapping content |
|---|
| 116 | -- |
|---|
| 117 | -- [@m@] Ends mapping content |
|---|
| 118 | -- |
|---|
| 119 | -- [@N@] Begins complete node |
|---|
| 120 | -- |
|---|
| 121 | -- [@n@] Ends complete node |
|---|
| 122 | -- |
|---|
| 123 | -- [@X@] Begins mapping key:value pair |
|---|
| 124 | -- |
|---|
| 125 | -- [@x@] Ends mapping key:value pair |
|---|
| 126 | -- |
|---|
| 127 | -- [@O@] Begins document |
|---|
| 128 | -- |
|---|
| 129 | -- [@o@] Ends document |
|---|
| 130 | -- |
|---|
| 131 | -- [@Y@] Begins YAML stream |
|---|
| 132 | -- |
|---|
| 133 | -- [@y@] Ends YAML stream |
|---|
| 134 | -- |
|---|
| 135 | -- [@!@] Parsing error at this point. |
|---|
| 136 | -- |
|---|
| 137 | -- [@-@] Unparsed text following error point. |
|---|
| 138 | -- |
|---|
| 139 | -- In addition, the following codes are used for testing partial productions |
|---|
| 140 | -- and do not appear when parsing a complete YAML stream: |
|---|
| 141 | -- |
|---|
| 142 | -- [@?@] Contains test characters otherwise unassigned |
|---|
| 143 | -- |
|---|
| 144 | -- [@\/@] Value of detected parameters |
|---|
| 145 | -- |
|---|
| 146 | ------------------------------------------------------------------------------- |
|---|
| 147 | module Main (main) where |
|---|
| 148 | |
|---|
| 149 | import Control.Monad |
|---|
| 150 | import qualified Data.ByteString.Lazy.Char8 as C |
|---|
| 151 | import System.Console.GetOpt |
|---|
| 152 | import System.Environment |
|---|
| 153 | import System.IO |
|---|
| 154 | import Text.Regex |
|---|
| 155 | import Text.Yaml.Reference |
|---|
| 156 | |
|---|
| 157 | |
|---|
| 158 | -- | Command line flag. |
|---|
| 159 | data Flag = Help -- ^ Request printing usage. |
|---|
| 160 | | Output String -- ^ Specify output file name. |
|---|
| 161 | | Production String -- ^ Specify start production name. |
|---|
| 162 | | Unbuffered -- ^ Disable @stdout@ bufferting. |
|---|
| 163 | | Following -- ^ Emit unparsed text following an error. |
|---|
| 164 | | ParamN (Maybe Int) -- ^ Specify $n$ parameter. |
|---|
| 165 | | ParamT (Maybe Chomp) -- ^ Specify $t$ parameter. |
|---|
| 166 | | ParamC (Maybe Context) -- ^ Specify $c$ parameter. |
|---|
| 167 | | Input String -- ^ Specify input file name. |
|---|
| 168 | deriving Show |
|---|
| 169 | |
|---|
| 170 | -- | Command line options. |
|---|
| 171 | optionDescriptions :: [OptDescr Flag] |
|---|
| 172 | optionDescriptions = [ |
|---|
| 173 | Option ['h', '?'] ["help"] |
|---|
| 174 | (NoArg Help) |
|---|
| 175 | "print usage and exit", |
|---|
| 176 | Option ['o'] ["output"] |
|---|
| 177 | (ReqArg Output "file") |
|---|
| 178 | "output file", |
|---|
| 179 | Option ['u'] ["unbuffered"] |
|---|
| 180 | (NoArg Unbuffered) |
|---|
| 181 | "disable stdout buffering", |
|---|
| 182 | Option ['f'] ["following"] |
|---|
| 183 | (NoArg Following) |
|---|
| 184 | "emit unparsed input following an error", |
|---|
| 185 | Option ['p'] ["production"] |
|---|
| 186 | (ReqArg Production "production") |
|---|
| 187 | "top production", |
|---|
| 188 | Option ['n'] ["n-value"] |
|---|
| 189 | (ReqArg (ParamN . readParameter) "indentation") |
|---|
| 190 | "n parameter value (indentation)", |
|---|
| 191 | Option ['c'] ["context"] |
|---|
| 192 | (ReqArg (ParamC . readParameter) "context") |
|---|
| 193 | "c parameter value (context)", |
|---|
| 194 | Option ['t'] ["chomp"] |
|---|
| 195 | (ReqArg (ParamT . readParameter) "chomp") |
|---|
| 196 | "t parameter value (chomp)" |
|---|
| 197 | ] |
|---|
| 198 | |
|---|
| 199 | -- | @testParameter parameter@ converts the /parameter/ to a typed value. Patch |
|---|
| 200 | -- the @-@ characters in the /c/ parameter into @_@ to make it possible for the |
|---|
| 201 | -- built-in lexer to handle them. |
|---|
| 202 | readParameter :: (Read t) => String -> Maybe t |
|---|
| 203 | readParameter text = Just $ read $ subRegex (mkRegex "([a-z])-") text "\\1_" |
|---|
| 204 | |
|---|
| 205 | -- | @usage@ returns the usage string for the program. |
|---|
| 206 | usage :: String |
|---|
| 207 | usage = usageInfo "yamlSyntax: [options] [file]" optionDescriptions |
|---|
| 208 | |
|---|
| 209 | -- | @collectFlags args@ converts the command line /args/ to list of 'Flag'. |
|---|
| 210 | collectFlags :: [String] -> IO [Flag] |
|---|
| 211 | collectFlags args = |
|---|
| 212 | case getOpt Permute optionDescriptions args of |
|---|
| 213 | (_, _, [error:errors]) -> ioError $ userError $ (concat [error:errors]) ++ usage |
|---|
| 214 | (flags, [], []) -> do return flags |
|---|
| 215 | (flags, [arg], []) -> do return $ [Input arg] ++ flags |
|---|
| 216 | (flags, _, []) -> ioError $ userError "more than one input file" |
|---|
| 217 | |
|---|
| 218 | -- | Options controlling program behavior |
|---|
| 219 | data Options = Options { |
|---|
| 220 | oToHelp :: Bool, -- ^ Whether to just print the usage. |
|---|
| 221 | oToUnbuffer :: Bool, -- ^ Whether to disable @stdout@ buffering. |
|---|
| 222 | oWithFollowing :: Bool, -- ^ Whether to emit unparsed text following an error. |
|---|
| 223 | oInput :: String, -- ^ Name of input file ("-": @stdin@). |
|---|
| 224 | oOutput :: String, -- ^ Name of output file ("-": @stdout@). |
|---|
| 225 | oProduction :: String, -- ^ Name of start production. |
|---|
| 226 | oN :: Maybe Int, -- ^ N parameter, if any. |
|---|
| 227 | oC :: Maybe Context, -- ^ C parameter, if any. |
|---|
| 228 | oT :: Maybe Chomp -- ^ T parameter, if any. |
|---|
| 229 | } |
|---|
| 230 | |
|---|
| 231 | -- | Default options if no flags are specified. |
|---|
| 232 | defaultOptions = Options { |
|---|
| 233 | oToHelp = False, |
|---|
| 234 | oToUnbuffer = False, |
|---|
| 235 | oWithFollowing = False, |
|---|
| 236 | oInput = "-", |
|---|
| 237 | oOutput = "-", |
|---|
| 238 | oProduction = "l-yaml-stream", |
|---|
| 239 | oN = Nothing, |
|---|
| 240 | oC = Nothing, |
|---|
| 241 | oT = Nothing |
|---|
| 242 | } |
|---|
| 243 | |
|---|
| 244 | -- | @applyFlags flags options@ applies the specified /flags/ to the /options/. |
|---|
| 245 | applyFlags :: [Flag] -> Options -> IO Options |
|---|
| 246 | applyFlags [] options = return options |
|---|
| 247 | applyFlags (flag:flags) options = |
|---|
| 248 | case flag of |
|---|
| 249 | Help -> applyFlags flags options { oToHelp = True } |
|---|
| 250 | Unbuffered -> applyFlags flags options { oToUnbuffer = True } |
|---|
| 251 | Following -> applyFlags flags options { oWithFollowing = True } |
|---|
| 252 | Output name -> applyFlags flags options { oOutput = name } |
|---|
| 253 | Input name -> applyFlags flags options { oInput = name } |
|---|
| 254 | ParamN value -> applyFlags flags options { oN = value } |
|---|
| 255 | ParamC value -> applyFlags flags options { oC = value } |
|---|
| 256 | ParamT value -> applyFlags flags options { oT = value } |
|---|
| 257 | Production name -> applyFlags flags options { oProduction = name } |
|---|
| 258 | |
|---|
| 259 | -- | @fromFile name@ returns the contents of the specified input file called |
|---|
| 260 | -- /name/ (may be "-" for @stdin@). |
|---|
| 261 | fromFile :: String -> IO C.ByteString |
|---|
| 262 | fromFile name = |
|---|
| 263 | case name of |
|---|
| 264 | "-" -> C.getContents |
|---|
| 265 | path -> C.readFile path |
|---|
| 266 | |
|---|
| 267 | -- | @intoFile name text@ writes the /text/ into the specified output file |
|---|
| 268 | -- called /name/ (may be "-" for @stdout@). |
|---|
| 269 | intoFile :: String -> String -> IO () |
|---|
| 270 | intoFile name text = |
|---|
| 271 | case name of |
|---|
| 272 | "-" -> putStr text |
|---|
| 273 | path -> writeFile path $ text |
|---|
| 274 | |
|---|
| 275 | -- | @runWith options@ runs the program with the specified /options/. |
|---|
| 276 | runWith :: Options -> IO () |
|---|
| 277 | runWith options = |
|---|
| 278 | do let resolved = case ((oN options), (oC options), (oT options)) of |
|---|
| 279 | (Nothing, Nothing, Nothing) -> tokenizer (oProduction options) |
|---|
| 280 | (Just n, Nothing, Nothing) -> tokenizerWithN (oProduction options) n |
|---|
| 281 | (Nothing, Just c, Nothing) -> tokenizerWithC (oProduction options) c |
|---|
| 282 | (Nothing, Nothing, Just t) -> tokenizerWithT (oProduction options) t |
|---|
| 283 | (Just n, Just c, Nothing) -> tokenizerWithNC (oProduction options) n c |
|---|
| 284 | (Just n, Nothing, Just t) -> tokenizerWithNT (oProduction options) n t |
|---|
| 285 | _ -> error "No production with this combination of parameters" |
|---|
| 286 | case resolved of |
|---|
| 287 | Nothing -> error $ "No production " ++ (oProduction options) ++ " or it doesn't take this combination of parameters" |
|---|
| 288 | Just parser -> do text <- fromFile (oInput options) |
|---|
| 289 | let tokens = parser (oInput options) text (oWithFollowing options) |
|---|
| 290 | intoFile (oOutput options) (showTokens tokens) |
|---|
| 291 | |
|---|
| 292 | -- | @main@ converts an input YAML file to YEAST tokens. |
|---|
| 293 | main :: IO () |
|---|
| 294 | main = do args <- getArgs |
|---|
| 295 | flags <- collectFlags args |
|---|
| 296 | options <- applyFlags flags defaultOptions |
|---|
| 297 | when (oToUnbuffer options) $ hSetBuffering stdout NoBuffering |
|---|
| 298 | if oToHelp options |
|---|
| 299 | then putStrLn usage |
|---|
| 300 | else runWith options |
|---|