mirror of
https://github.com/lettier/parsing-with-haskell-parser-combinators.git
synced 2024-10-03 17:08:10 +03:00
Adds initial files.
This commit is contained in:
commit
d2ddec4cdd
9
.gitignore
vendored
Normal file
9
.gitignore
vendored
Normal file
@ -0,0 +1,9 @@
|
||||
*stack-work*
|
||||
*cabal*sandbox*
|
||||
*dist*
|
||||
*tmp*
|
||||
*blend1
|
||||
*blend2
|
||||
*blend3
|
||||
*blend4
|
||||
*blend5
|
29
LICENSE
Normal file
29
LICENSE
Normal file
@ -0,0 +1,29 @@
|
||||
BSD 3-Clause License
|
||||
|
||||
Copyright (c) 2019, David Lettier
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the copyright holder nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
23
docs/_build-docs.sh
Executable file
23
docs/_build-docs.sh
Executable file
@ -0,0 +1,23 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
SCRIPT_PATH="$(cd "$(dirname "$0")"; pwd -P)"
|
||||
MAIN_TITLE="Parsing With Haskell Parser Combinators"
|
||||
DESCRIPTION="Need to parse something? Never heard of a parser combinator? \
|
||||
Looking to learn some Haskell? \
|
||||
Awesome! This is everything you'll need to get up and parsing with Haskell parser combinators. \
|
||||
From here you can try tackling esoteric data serialization formats, compiler front ends, domain specific languages—you name it!"
|
||||
REPO_URL="https://github.com/lettier/parsing-with-haskell-parser-combinators"
|
||||
AUTHOR="David Lettier"
|
||||
CSS="style.css"
|
||||
|
||||
$PANDOC \
|
||||
-f gfm \
|
||||
-t html5 \
|
||||
--highlight-style=breezedark \
|
||||
--template=$SCRIPT_PATH/_template.html5 \
|
||||
$SCRIPT_PATH/../README.md \
|
||||
--metadata pagetitle="$MAIN_TITLE" \
|
||||
--metadata author-meta="$AUTHOR" \
|
||||
--metadata description="$DESCRIPTION" \
|
||||
--metadata css=$CSS \
|
||||
-o "$SCRIPT_PATH/index.html"
|
79
docs/_template.html5
Normal file
79
docs/_template.html5
Normal file
@ -0,0 +1,79 @@
|
||||
<!DOCTYPE html>
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" lang="$lang$" xml:lang="$lang$"$if(dir)$ dir="$dir$"$endif$>
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes" />
|
||||
<meta name="description" content="$description$" />
|
||||
<meta property="og:title" content="$if(title-prefix)$$title-prefix$ – $endif$$pagetitle$" />
|
||||
<meta property="og:description" content="$description$" />
|
||||
<meta property="og:image" content="https://i.imgur.com/kw2kYzf.jpg" />
|
||||
<meta name="twitter:title" content="$if(title-prefix)$$title-prefix$ – $endif$$pagetitle$" />
|
||||
<meta name="twitter:description" content="$description$" />
|
||||
<meta name="twitter:image" content="https://i.imgur.com/kw2kYzf.jpg" />
|
||||
<meta name="twitter:card" content="summary_large_image" />
|
||||
$for(author-meta)$
|
||||
<meta name="author" content="$author-meta$" />
|
||||
$endfor$
|
||||
$if(date-meta)$
|
||||
<meta name="dcterms.date" content="$date-meta$" />
|
||||
$endif$
|
||||
$if(keywords)$
|
||||
<meta name="keywords" content="$for(keywords)$$keywords$$sep$, $endfor$" />
|
||||
$endif$
|
||||
<title>$if(title-prefix)$$title-prefix$ – $endif$$pagetitle$</title>
|
||||
<style>
|
||||
code{white-space: pre-wrap;}
|
||||
span.smallcaps{font-variant: small-caps;}
|
||||
span.underline{text-decoration: underline;}
|
||||
div.column{display: inline-block; vertical-align: top; width: 50%;}
|
||||
$if(quotes)$
|
||||
q { quotes: "“" "”" "‘" "’"; }
|
||||
$endif$
|
||||
</style>
|
||||
$if(highlighting-css)$
|
||||
<style>
|
||||
$highlighting-css$
|
||||
</style>
|
||||
$endif$
|
||||
$if(math)$
|
||||
$math$
|
||||
$endif$
|
||||
<!--[if lt IE 9]>
|
||||
<script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.3/html5shiv-printshiv.min.js"></script>
|
||||
<![endif]-->
|
||||
$for(header-includes)$
|
||||
$header-includes$
|
||||
$endfor$
|
||||
$for(css)$
|
||||
<link rel="stylesheet" href="$css$" />
|
||||
$endfor$
|
||||
</head>
|
||||
<body>
|
||||
$for(include-before)$
|
||||
$include-before$
|
||||
$endfor$
|
||||
$if(title)$
|
||||
<header id="title-block-header">
|
||||
<h1 class="title">$title$</h1>
|
||||
$if(subtitle)$
|
||||
<p class="subtitle">$subtitle$</p>
|
||||
$endif$
|
||||
$for(author)$
|
||||
<p class="author">$author$</p>
|
||||
$endfor$
|
||||
$if(date)$
|
||||
<p class="date">$date$</p>
|
||||
$endif$
|
||||
</header>
|
||||
$endif$
|
||||
$if(toc)$
|
||||
<nav id="$idprefix$TOC" role="doc-toc">
|
||||
$table-of-contents$
|
||||
</nav>
|
||||
$endif$
|
||||
$body$
|
||||
$for(include-after)$
|
||||
$include-after$
|
||||
$endfor$
|
||||
</body>
|
||||
</html>
|
1064
docs/index.html
Normal file
1064
docs/index.html
Normal file
File diff suppressed because it is too large
Load Diff
332
docs/style.css
Normal file
332
docs/style.css
Normal file
@ -0,0 +1,332 @@
|
||||
html {
|
||||
font-size: 100%;
|
||||
overflow-y: scroll;
|
||||
-webkit-text-size-adjust: 100%;
|
||||
-ms-text-size-adjust: 100%;
|
||||
}
|
||||
|
||||
body {
|
||||
color: #444;
|
||||
font-family: Helvetica, Arial, sans-serif;
|
||||
font-size: 20px;
|
||||
line-height: 2;
|
||||
padding: 1em;
|
||||
margin: auto;
|
||||
max-width: 887px;
|
||||
background: #fefefe;
|
||||
}
|
||||
|
||||
a {
|
||||
color: #059;
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
a:visited {
|
||||
color: #048;
|
||||
}
|
||||
|
||||
a:hover {
|
||||
color: #06a;
|
||||
}
|
||||
|
||||
a:active {
|
||||
color: #06a;
|
||||
}
|
||||
|
||||
a:focus {
|
||||
outline: thin dotted;
|
||||
}
|
||||
|
||||
*::-moz-selection {
|
||||
background: rgba(0, 200, 255, 0.3);
|
||||
color: #111;
|
||||
}
|
||||
|
||||
*::selection {
|
||||
background: rgba(0, 200, 255, 0.3);
|
||||
color: #111;
|
||||
}
|
||||
|
||||
a::-moz-selection {
|
||||
background: rgba(0, 200, 255, 0.3);
|
||||
color: #048;
|
||||
}
|
||||
|
||||
a::selection {
|
||||
background: rgba(0, 200, 255, 0.3);
|
||||
color: #048;
|
||||
}
|
||||
|
||||
a > span.emoji {
|
||||
font-size: 30px;
|
||||
margin-left: 5px;
|
||||
}
|
||||
|
||||
p {
|
||||
margin: 1em 0;
|
||||
}
|
||||
|
||||
img {
|
||||
max-width: 100%;
|
||||
}
|
||||
|
||||
h1, h2, h3, h4, h5, h6 {
|
||||
color: #111;
|
||||
line-height: 125%;
|
||||
margin-top: 1em;
|
||||
font-weight: lighter;
|
||||
font-family: 'Roboto Condensed', Helvetica, Arial, sans-serif;
|
||||
}
|
||||
|
||||
h4, h5, h6 {
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
h1 {
|
||||
font-size: 2.5em;
|
||||
}
|
||||
|
||||
h2 {
|
||||
font-size: 2em;
|
||||
}
|
||||
|
||||
h3 {
|
||||
font-size: 1.5em;
|
||||
}
|
||||
|
||||
h4 {
|
||||
font-size: 1.2em;
|
||||
}
|
||||
|
||||
h5 {
|
||||
font-size: 1em;
|
||||
}
|
||||
|
||||
h6 {
|
||||
font-size: 0.9em;
|
||||
}
|
||||
|
||||
blockquote {
|
||||
color: #666666;
|
||||
margin: 0;
|
||||
padding-left: 3em;
|
||||
border-left: 0.5em #EEE solid;
|
||||
}
|
||||
|
||||
hr {
|
||||
display: block;
|
||||
height: 2px;
|
||||
border: 0;
|
||||
border-top: 1px solid #aaa;
|
||||
border-bottom: 1px solid #eee;
|
||||
margin: 1em 0;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
pre, code, kbd, samp {
|
||||
font-family: monospace;
|
||||
font-size: 14px;
|
||||
}
|
||||
|
||||
pre {
|
||||
white-space: pre;
|
||||
white-space: pre-wrap;
|
||||
word-wrap: break-word;
|
||||
padding: 15px;
|
||||
}
|
||||
|
||||
b, strong {
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
p > code {
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
dfn {
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
ins {
|
||||
background: #ff9;
|
||||
color: #000;
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
mark {
|
||||
background: #ff0;
|
||||
color: #000;
|
||||
font-style: italic;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
sub, sup {
|
||||
font-size: 75%;
|
||||
line-height: 0;
|
||||
position: relative;
|
||||
vertical-align: baseline;
|
||||
}
|
||||
|
||||
sup {
|
||||
top: -0.5em;
|
||||
}
|
||||
|
||||
sub {
|
||||
bottom: -0.25em;
|
||||
}
|
||||
|
||||
ul, ol {
|
||||
margin: 1em 0;
|
||||
padding: 0 0 0 2em;
|
||||
}
|
||||
|
||||
li p:last-child {
|
||||
margin-bottom: 0;
|
||||
}
|
||||
|
||||
ul ul, ol ol {
|
||||
margin: .3em 0;
|
||||
}
|
||||
|
||||
dl {
|
||||
margin-bottom: 1em;
|
||||
}
|
||||
|
||||
dt {
|
||||
font-weight: bold;
|
||||
margin-bottom: .8em;
|
||||
}
|
||||
|
||||
dd {
|
||||
margin: 0 0 .8em 2em;
|
||||
}
|
||||
|
||||
dd:last-child {
|
||||
margin-bottom: 0;
|
||||
}
|
||||
|
||||
img {
|
||||
border: 0;
|
||||
-ms-interpolation-mode: bicubic;
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
figure {
|
||||
display: block;
|
||||
text-align: center;
|
||||
margin: 1em 0;
|
||||
}
|
||||
|
||||
figure img {
|
||||
border: none;
|
||||
margin: 0 auto;
|
||||
}
|
||||
|
||||
figcaption {
|
||||
font-size: 0.8em;
|
||||
font-style: italic;
|
||||
margin: 0 0 .8em;
|
||||
}
|
||||
|
||||
table {
|
||||
margin-bottom: 2em;
|
||||
border-bottom: 1px solid #ddd;
|
||||
border-right: 1px solid #ddd;
|
||||
border-spacing: 0;
|
||||
border-collapse: collapse;
|
||||
}
|
||||
|
||||
table th {
|
||||
padding: .2em 1em;
|
||||
background-color: #eee;
|
||||
border-top: 1px solid #ddd;
|
||||
border-left: 1px solid #ddd;
|
||||
}
|
||||
|
||||
table td {
|
||||
padding: .2em 1em;
|
||||
border-top: 1px solid #ddd;
|
||||
border-left: 1px solid #ddd;
|
||||
vertical-align: top;
|
||||
}
|
||||
|
||||
kbd {
|
||||
border: 1px solid #999;
|
||||
padding: 5px;
|
||||
border-radius: 2px;
|
||||
background-color: #555;
|
||||
color: #eee;
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
.author {
|
||||
font-size: 1.2em;
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
@media print {
|
||||
* {
|
||||
background: transparent !important;
|
||||
color: black !important;
|
||||
filter: none !important;
|
||||
-ms-filter: none !important;
|
||||
}
|
||||
|
||||
body {
|
||||
font-size: 12pt;
|
||||
max-width: 100%;
|
||||
}
|
||||
|
||||
a, a:visited {
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
hr {
|
||||
height: 1px;
|
||||
border: 0;
|
||||
border-bottom: 1px solid black;
|
||||
}
|
||||
|
||||
a[href]:after {
|
||||
content: " (" attr(href) ")";
|
||||
}
|
||||
|
||||
abbr[title]:after {
|
||||
content: " (" attr(title) ")";
|
||||
}
|
||||
|
||||
.ir a:after, a[href^="javascript:"]:after, a[href^="#"]:after {
|
||||
content: "";
|
||||
}
|
||||
|
||||
pre, blockquote {
|
||||
border: 1px solid #999;
|
||||
padding-right: 1em;
|
||||
page-break-inside: avoid;
|
||||
}
|
||||
|
||||
tr, img {
|
||||
page-break-inside: avoid;
|
||||
}
|
||||
|
||||
img {
|
||||
max-width: 100% !important;
|
||||
}
|
||||
|
||||
@page :left {
|
||||
margin: 15mm 20mm 15mm 10mm;
|
||||
}
|
||||
|
||||
@page :right {
|
||||
margin: 15mm 10mm 15mm 20mm;
|
||||
}
|
||||
|
||||
p, h2, h3 {
|
||||
orphans: 3;
|
||||
widows: 3;
|
||||
}
|
||||
|
||||
h2, h3 {
|
||||
page-break-after: avoid;
|
||||
}
|
||||
}
|
27
parsing-with-haskell-parser-combinators.cabal
Normal file
27
parsing-with-haskell-parser-combinators.cabal
Normal file
@ -0,0 +1,27 @@
|
||||
name: parsing-with-haskell-parser-combinators
|
||||
version: 0.0.0.0
|
||||
homepage: https://github.com/lettier/parsing-with-haskell-parser-combinators
|
||||
author: David Lettier
|
||||
copyright: 2019 David Lettier
|
||||
license: BSD3
|
||||
build-type: Simple
|
||||
cabal-version: >= 1.10
|
||||
extra-source-files: README.md
|
||||
|
||||
source-repository head
|
||||
type: git
|
||||
location: https://github.com/lettier/parsing-with-haskell-parser-combinators
|
||||
|
||||
executable version-number-parser
|
||||
main-is: src/version-number-parser.hs
|
||||
build-depends:
|
||||
base >=4.7 && <5
|
||||
ghc-options: -threaded -rtsopts -with-rtsopts=-N
|
||||
default-language: Haskell2010
|
||||
|
||||
executable srt-file-parser
|
||||
main-is: src/srt-file-parser.hs
|
||||
build-depends:
|
||||
base >=4.7 && <5
|
||||
ghc-options: -threaded -rtsopts -with-rtsopts=-N
|
||||
default-language: Haskell2010
|
476
src/srt-file-parser.hs
Normal file
476
src/srt-file-parser.hs
Normal file
@ -0,0 +1,476 @@
|
||||
{-
|
||||
SRT File Parser
|
||||
(C) 2019 David Lettier
|
||||
lettier.com
|
||||
-}
|
||||
|
||||
{-# LANGUAGE
|
||||
NamedFieldPuns
|
||||
#-}
|
||||
|
||||
import Control.Applicative ((<|>))
|
||||
import Control.Monad
|
||||
import Text.ParserCombinators.ReadP
|
||||
import Data.Char
|
||||
import Data.Maybe
|
||||
|
||||
type TagAttribute = (String, String)
|
||||
|
||||
data Tag =
|
||||
Tag
|
||||
{ name :: String
|
||||
, attributes :: [TagAttribute]
|
||||
}
|
||||
deriving (Show, Read)
|
||||
|
||||
data TaggedText =
|
||||
TaggedText
|
||||
{ text :: String
|
||||
, tags :: [Tag]
|
||||
}
|
||||
deriving (Show, Read)
|
||||
|
||||
data Timestamp =
|
||||
Timestamp
|
||||
{ hours :: Int
|
||||
, minutes :: Int
|
||||
, seconds :: Int
|
||||
, milliseconds :: Int
|
||||
}
|
||||
deriving (Show, Read)
|
||||
|
||||
data SrtSubtitleCoordinates =
|
||||
SrtSubtitleCoordinates
|
||||
{ x1 :: Int
|
||||
, x2 :: Int
|
||||
, y1 :: Int
|
||||
, y2 :: Int
|
||||
}
|
||||
deriving (Show, Read)
|
||||
|
||||
data SrtSubtitle =
|
||||
SrtSubtitle
|
||||
{ index :: Int
|
||||
, start :: Timestamp
|
||||
, end :: Timestamp
|
||||
, coordinates :: Maybe SrtSubtitleCoordinates
|
||||
, taggedText :: [TaggedText]
|
||||
}
|
||||
deriving (Show, Read)
|
||||
|
||||
main
|
||||
:: IO ()
|
||||
main
|
||||
= do
|
||||
putStrLn "What is the SRT file path?"
|
||||
filePath <- getLine
|
||||
text <- readFile filePath
|
||||
let result =
|
||||
case readP_to_S parseSrt text of
|
||||
[] -> []
|
||||
r@(_:_) -> fst $ last r
|
||||
putStrLn ""
|
||||
print result
|
||||
|
||||
parseSrt
|
||||
:: ReadP [SrtSubtitle]
|
||||
parseSrt
|
||||
=
|
||||
manyTill parseBlock (skipSpaces >> eof)
|
||||
|
||||
parseBlock
|
||||
:: ReadP SrtSubtitle
|
||||
parseBlock
|
||||
= do
|
||||
i <- parseIndex
|
||||
(s, e) <- parseTimestamps
|
||||
c <- parseCoordinates
|
||||
t <- parseTextLines
|
||||
return
|
||||
SrtSubtitle
|
||||
{ index = i
|
||||
, start = s
|
||||
, end = e
|
||||
, coordinates = c
|
||||
, taggedText = t
|
||||
}
|
||||
|
||||
parseBlock'
|
||||
:: ReadP SrtSubtitle
|
||||
parseBlock'
|
||||
=
|
||||
SrtSubtitle
|
||||
<$> parseIndex
|
||||
<*> parseStartTimestamp
|
||||
<*> parseEndTimestamp
|
||||
<*> parseCoordinates
|
||||
<*> parseTextLines
|
||||
|
||||
parseIndex
|
||||
:: ReadP Int
|
||||
parseIndex
|
||||
=
|
||||
skipSpaces
|
||||
>> readInt <$> parseNumber
|
||||
|
||||
parseTimestamps
|
||||
:: ReadP (Timestamp, Timestamp)
|
||||
parseTimestamps
|
||||
= do
|
||||
_ <- char '\n'
|
||||
s <- parseTimestamp
|
||||
_ <- skipSpaces
|
||||
_ <- string "-->"
|
||||
_ <- skipSpaces
|
||||
e <- parseTimestamp
|
||||
return (s, e)
|
||||
|
||||
parseStartTimestamp
|
||||
:: ReadP Timestamp
|
||||
parseStartTimestamp
|
||||
=
|
||||
char '\n'
|
||||
>> parseTimestamp
|
||||
|
||||
parseEndTimestamp
|
||||
:: ReadP Timestamp
|
||||
parseEndTimestamp
|
||||
=
|
||||
skipSpaces
|
||||
>> string "-->"
|
||||
>> skipSpaces
|
||||
>> parseTimestamp
|
||||
|
||||
parseTimestamp
|
||||
:: ReadP Timestamp
|
||||
parseTimestamp
|
||||
= do
|
||||
h <- parseNumber
|
||||
_ <- char ':'
|
||||
m <- parseNumber
|
||||
_ <- char ':'
|
||||
s <- parseNumber
|
||||
_ <- char ',' <|> char '.'
|
||||
m' <- parseNumber
|
||||
return
|
||||
Timestamp
|
||||
{ hours = readInt h
|
||||
, minutes = readInt m
|
||||
, seconds = readInt s
|
||||
, milliseconds = readInt m'
|
||||
}
|
||||
|
||||
parseCoordinates
|
||||
:: ReadP (Maybe SrtSubtitleCoordinates)
|
||||
parseCoordinates
|
||||
=
|
||||
option Nothing $ do
|
||||
_ <- skipSpaces1
|
||||
x1 <- parseCoordinate 'x' 1
|
||||
_ <- skipSpaces1
|
||||
x2 <- parseCoordinate 'x' 2
|
||||
_ <- skipSpaces1
|
||||
y1 <- parseCoordinate 'y' 1
|
||||
_ <- skipSpaces1
|
||||
y2 <- parseCoordinate 'y' 2
|
||||
return
|
||||
$ Just
|
||||
SrtSubtitleCoordinates
|
||||
{ x1 = readInt x1
|
||||
, x2 = readInt x2
|
||||
, y1 = readInt y1
|
||||
, y2 = readInt y2
|
||||
}
|
||||
|
||||
parseCoordinate
|
||||
:: Char
|
||||
-> Int
|
||||
-> ReadP String
|
||||
parseCoordinate
|
||||
c
|
||||
n
|
||||
= do
|
||||
_ <- char (Data.Char.toUpper c) <|> char (Data.Char.toLower c)
|
||||
_ <- string $ show n ++ ":"
|
||||
parseNumber
|
||||
|
||||
parseTextLines
|
||||
:: ReadP [TaggedText]
|
||||
parseTextLines
|
||||
=
|
||||
char '\n'
|
||||
>> (getTaggedText <$> manyTill parseAny parseEndOfTextLines)
|
||||
|
||||
getTaggedText
|
||||
:: String
|
||||
-> [TaggedText]
|
||||
getTaggedText
|
||||
s
|
||||
=
|
||||
fst
|
||||
$ foldl
|
||||
folder
|
||||
([], [])
|
||||
parsed
|
||||
where
|
||||
parsed
|
||||
:: [String]
|
||||
parsed
|
||||
=
|
||||
case readP_to_S (parseTaggedText []) s of
|
||||
[] -> [s]
|
||||
r@(_:_) -> (fst . last) r
|
||||
folder
|
||||
:: ([TaggedText], [Tag])
|
||||
-> String
|
||||
-> ([TaggedText], [Tag])
|
||||
folder
|
||||
(tt, t)
|
||||
x
|
||||
| isTag x = (tt, updateTags t x)
|
||||
| otherwise = (tt ++ [TaggedText { text = x, tags = t}], t)
|
||||
|
||||
updateTags
|
||||
:: [Tag]
|
||||
-> String
|
||||
-> [Tag]
|
||||
updateTags
|
||||
tags
|
||||
x
|
||||
| isClosingTag x = remove compare' tags (makeTag x)
|
||||
| isOpeningTag x = add compare' tags (makeTag x)
|
||||
| otherwise = tags
|
||||
where
|
||||
compare'
|
||||
:: Tag
|
||||
-> Tag
|
||||
-> Bool
|
||||
compare'
|
||||
a
|
||||
b
|
||||
=
|
||||
name a /= name b
|
||||
|
||||
makeTag
|
||||
:: String
|
||||
-> Tag
|
||||
makeTag
|
||||
s
|
||||
=
|
||||
Tag
|
||||
{ name = getTagName s
|
||||
, attributes = getTagAttributes s
|
||||
}
|
||||
|
||||
parseEndOfTextLines
|
||||
:: ReadP ()
|
||||
parseEndOfTextLines
|
||||
=
|
||||
void (string "\n\n") <|> eof
|
||||
|
||||
parseTaggedText
|
||||
:: [String]
|
||||
-> ReadP [String]
|
||||
parseTaggedText
|
||||
strings
|
||||
= do
|
||||
s <- look
|
||||
case s of
|
||||
"" -> return strings
|
||||
_ -> do
|
||||
r <- munch1 (/= '<') <++ parseClosingTag <++ parseOpeningTag
|
||||
parseTaggedText $ strings ++ [r]
|
||||
|
||||
parseOpeningTag
|
||||
:: ReadP String
|
||||
parseOpeningTag
|
||||
= do
|
||||
_ <- char '<'
|
||||
t <- munch1 (\ c -> c /= '/' && c /= '>')
|
||||
_ <- char '>'
|
||||
return $ "<" ++ t ++ ">"
|
||||
|
||||
parseClosingTag
|
||||
:: ReadP String
|
||||
parseClosingTag
|
||||
= do
|
||||
_ <- char '<'
|
||||
_ <- char '/'
|
||||
t <- munch1 (/= '>')
|
||||
_ <- char '>'
|
||||
return $ "</" ++ t ++ ">"
|
||||
|
||||
getTagAttributes
|
||||
:: String
|
||||
-> [TagAttribute]
|
||||
getTagAttributes
|
||||
s
|
||||
=
|
||||
if isOpeningTag s
|
||||
then
|
||||
case readP_to_S (parseTagAttributes []) s of
|
||||
[] -> []
|
||||
(x:_) -> fst x
|
||||
else
|
||||
[]
|
||||
|
||||
getTagName
|
||||
:: String
|
||||
-> String
|
||||
getTagName
|
||||
s
|
||||
=
|
||||
case readP_to_S parseTagName s of
|
||||
[] -> ""
|
||||
(x:_) -> toLower' $ fst x
|
||||
|
||||
parseTagName
|
||||
:: ReadP String
|
||||
parseTagName
|
||||
= do
|
||||
_ <- char '<'
|
||||
_ <- munch (== '/')
|
||||
_ <- skipSpaces
|
||||
n <- munch1 (\ c -> c /= ' ' && c /= '>')
|
||||
_ <- munch (/= '>')
|
||||
_ <- char '>'
|
||||
return n
|
||||
|
||||
parseTagAttributes
|
||||
:: [TagAttribute]
|
||||
-> ReadP [TagAttribute]
|
||||
parseTagAttributes
|
||||
tagAttributes
|
||||
= do
|
||||
s <- look
|
||||
case s of
|
||||
"" -> return tagAttributes
|
||||
_ -> do
|
||||
let h = head s
|
||||
case h of
|
||||
'>' -> return tagAttributes
|
||||
'<' -> trimTagname >> parseTagAttributes'
|
||||
_ -> parseTagAttributes'
|
||||
where
|
||||
parseTagAttributes'
|
||||
:: ReadP [TagAttribute]
|
||||
parseTagAttributes'
|
||||
= do
|
||||
tagAttribute <- parseTagAttribute
|
||||
parseTagAttributes
|
||||
( add
|
||||
(\ a b -> fst a /= fst b)
|
||||
tagAttributes
|
||||
tagAttribute
|
||||
)
|
||||
|
||||
trimTagname
|
||||
:: ReadP ()
|
||||
trimTagname
|
||||
=
|
||||
char '<'
|
||||
>> skipSpaces
|
||||
>> munch1 (\ c -> c /= ' ' && c /= '>')
|
||||
>> return ()
|
||||
|
||||
parseTagAttribute
|
||||
:: ReadP TagAttribute
|
||||
parseTagAttribute
|
||||
= do
|
||||
_ <- skipSpaces
|
||||
k <- munch1 (/= '=')
|
||||
_ <- string "=\""
|
||||
v <- munch1 (/= '\"')
|
||||
_ <- char '\"'
|
||||
_ <- skipSpaces
|
||||
return (toLower' k, v)
|
||||
|
||||
parseAny
|
||||
:: ReadP Char
|
||||
parseAny
|
||||
=
|
||||
satisfy (const True)
|
||||
|
||||
parseNumber
|
||||
:: ReadP String
|
||||
parseNumber
|
||||
=
|
||||
munch1 isNumber
|
||||
|
||||
skipSpaces1
|
||||
:: ReadP ()
|
||||
skipSpaces1
|
||||
=
|
||||
void $ skipMany1 (char ' ')
|
||||
|
||||
isTag
|
||||
:: String
|
||||
-> Bool
|
||||
isTag
|
||||
s
|
||||
=
|
||||
isOpeningTag s || isClosingTag s
|
||||
|
||||
isOpeningTag
|
||||
:: String
|
||||
-> Bool
|
||||
isOpeningTag
|
||||
s
|
||||
=
|
||||
isPresent $ readP_to_S parseOpeningTag s
|
||||
|
||||
isClosingTag
|
||||
:: String
|
||||
-> Bool
|
||||
isClosingTag
|
||||
s
|
||||
=
|
||||
isPresent $ readP_to_S parseClosingTag s
|
||||
|
||||
readInt
|
||||
:: String
|
||||
-> Int
|
||||
readInt
|
||||
=
|
||||
read
|
||||
|
||||
toLower'
|
||||
:: String
|
||||
-> String
|
||||
toLower'
|
||||
=
|
||||
map toLower
|
||||
|
||||
remove
|
||||
:: (a -> a -> Bool)
|
||||
-> [a]
|
||||
-> a
|
||||
-> [a]
|
||||
remove
|
||||
f
|
||||
xs
|
||||
x
|
||||
=
|
||||
filter
|
||||
(f x)
|
||||
xs
|
||||
|
||||
add
|
||||
:: (a -> a -> Bool)
|
||||
-> [a]
|
||||
-> a
|
||||
-> [a]
|
||||
add
|
||||
f
|
||||
xs
|
||||
x
|
||||
| isPresent xs = remove f xs x ++ [x]
|
||||
| otherwise = [x]
|
||||
|
||||
isPresent
|
||||
:: Foldable t
|
||||
=> t a
|
||||
-> Bool
|
||||
isPresent
|
||||
=
|
||||
not . null
|
71
src/version-number-parser.hs
Normal file
71
src/version-number-parser.hs
Normal file
@ -0,0 +1,71 @@
|
||||
{-
|
||||
Version Number Parser
|
||||
(C) 2019 David Lettier
|
||||
lettier.com
|
||||
-}
|
||||
|
||||
import Control.Monad
|
||||
import Text.ParserCombinators.ReadP
|
||||
import Data.Char
|
||||
import Data.Maybe
|
||||
|
||||
main
|
||||
:: IO ()
|
||||
main
|
||||
= do
|
||||
putStrLn "What is the version output file path?"
|
||||
filePath <- getLine
|
||||
text <- readFile filePath
|
||||
let result =
|
||||
case readP_to_S (parseVersionNumber []) text of
|
||||
[] -> []
|
||||
r@(_:_) -> map readInt $ fst $ last r
|
||||
putStrLn ""
|
||||
print result
|
||||
|
||||
parseVersionNumber
|
||||
:: [String]
|
||||
-> ReadP [String]
|
||||
parseVersionNumber
|
||||
nums
|
||||
= do
|
||||
_ <- parseNotNumber
|
||||
num <- parseNumber
|
||||
let nums' = nums ++ [num]
|
||||
parseSeparator nums' parseVersionNumber
|
||||
|
||||
parseSeparator
|
||||
:: [String]
|
||||
-> ([String] -> ReadP [String])
|
||||
-> ReadP [String]
|
||||
parseSeparator
|
||||
nums
|
||||
f
|
||||
= do
|
||||
next <- look
|
||||
case next of
|
||||
"" -> return nums
|
||||
(c:_) ->
|
||||
case c of
|
||||
'.' -> f nums
|
||||
'-' -> if length nums == 1 then f [] else f nums
|
||||
_ -> if length nums == 1 then f [] else return nums
|
||||
|
||||
parseNotNumber
|
||||
:: ReadP String
|
||||
parseNotNumber
|
||||
=
|
||||
munch (not . isNumber)
|
||||
|
||||
parseNumber
|
||||
:: ReadP String
|
||||
parseNumber
|
||||
=
|
||||
munch1 isNumber
|
||||
|
||||
readInt
|
||||
:: String
|
||||
-> Int
|
||||
readInt
|
||||
=
|
||||
read
|
3
stack.yaml
Normal file
3
stack.yaml
Normal file
@ -0,0 +1,3 @@
|
||||
resolver: lts-13.27
|
||||
packages:
|
||||
- .
|
12
stack.yaml.lock
Normal file
12
stack.yaml.lock
Normal file
@ -0,0 +1,12 @@
|
||||
# This file was autogenerated by Stack.
|
||||
# You should not edit this file by hand.
|
||||
# For more information, please see the documentation at:
|
||||
# https://docs.haskellstack.org/en/stable/lock_files
|
||||
|
||||
packages: []
|
||||
snapshots:
|
||||
- completed:
|
||||
size: 500539
|
||||
url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/13/27.yaml
|
||||
sha256: 690db832392afe55733b4c7023fd29b1b1c660ee42f1fb505b86b07394ca994e
|
||||
original: lts-13.27
|
22
test-input/gifcurry-version-output.txt
Normal file
22
test-input/gifcurry-version-output.txt
Normal file
@ -0,0 +1,22 @@
|
||||
|
||||
▄▄▄▄▄▄▄▄
|
||||
▄▄████ ▀▀███▄
|
||||
████▀ ▄ ▀███ ▄ ▐██▌ ▄███▄
|
||||
▄ ▐███ ████ ▀███ ▄███▀▀██ ███
|
||||
▐█▌ ██ ▐███ ████ ███ ▐██ █████▌ ▄█████ ▐██▌ ██▌ ██▄██▌ ██▄██▌ ██▌ ███
|
||||
███ ▐▌ ███ ▐███▌ ███ ████▌ ▐██ ██▌ ███ ▐██▌ ██▌ ███▀ ███▀ ▐██ ███
|
||||
████ ███▀ ▐█ ███▌ ███ ██▌ ▐██ ██▌ ███ ▐██▌ ██▌ ██▌ ██▌ ██▌▐██
|
||||
▐███▄ ▐██▌ ██ ██ ███▄▄▄██▌ ▐██ ██▌ ███▄▄█ ███▄███▌ ██▌ ██▌ ████▌
|
||||
▀███ ▀███ ▐███ ▀ ▀▀▀▀▀ ▀▀ ▀▀ ▀▀▀ ▀▀▀ ▀▀ ▀▀ ███
|
||||
███▄ ▀ ████▌ ███▀
|
||||
▀███▄▄ █████▀
|
||||
▀▀▀▀▀▀▀
|
||||
|
||||
|
||||
Gifcurry 6.0.0.0
|
||||
(C) 2016 David Lettier
|
||||
https://lettier.com
|
||||
|
||||
Wanna help out Gifcurry? Star it on GitHub! ☺ Thanks for helping out—you rock!
|
||||
https://github.com/lettier/gifcurry/stargazers
|
||||
|
4
test-input/imagemagick-version-output.txt
Normal file
4
test-input/imagemagick-version-output.txt
Normal file
@ -0,0 +1,4 @@
|
||||
Version: ImageMagick 2018-10-24 6.9.10-14 Q16 x86_64 https://imagemagick.org
|
||||
Copyright: © 1999-2018 ImageMagick Studio LLC
|
||||
License: https://imagemagick.org/script/license.php
|
||||
Features: Cipher DPC HDRI Modules OpenCL OpenMP
|
27
test-input/subtitles.srt
Normal file
27
test-input/subtitles.srt
Normal file
@ -0,0 +1,27 @@
|
||||
|
||||
|
||||
|
||||
1
|
||||
01:00:12,000 --> 01:00:15,000
|
||||
This is a subtitle.
|
||||
|
||||
|
||||
|
||||
|
||||
2
|
||||
1:02:18,010 --> 01:05:10,010
|
||||
This is some subtitle
|
||||
text that spans multiple lines.
|
||||
It includes formatting
|
||||
like <b>bold</b>, <i>italic</i>,
|
||||
<u>underline</u>, < font color="#010101" color="#333" >
|
||||
font > color</font>, and << even <b><i><u>
|
||||
nested tags over multiple
|
||||
</u></i> lines</b><u>.
|
||||
|
||||
3
|
||||
03:23:11,010 --> 03:55:17.110 X1:123 X2:223 Y1:50 Y2:101
|
||||
This subtitle specifies a text box
|
||||
using X1, X2, Y1, and Y2.
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user