Skip to content
Extraits de code Groupes Projets
Valider ff4f7a25 rédigé par Seipas's avatar Seipas
Parcourir les fichiers

Standalone + latin1 support + README updates

parent 082ad229
Aucune branche associée trouvée
Aucune étiquette associée trouvée
Aucune requête de fusion associée trouvée
What is EpitASS ?
==============
EpitASS is a converter for karaoke subtitle files from Toyunda Raw format to v4+ aegisub-generated-style ASS format.
EpitASS is a converter for karaoke subtitle files from Toyunda Raw format to V4+ aegisub-generated-style ASS format.
Project repository : https://framagit.org/Seipas/EpitASS
How to build it ?
==============
- Install `ocamlc`, `ocamllex` and `ocamlyacc`
- Install `ocamlrun`, `ocamlc`, `ocamllex` and `ocamlyacc`
- Run `make`
......@@ -32,11 +33,9 @@ ocamlc -c toyunda_raw_to_v4p_ass.ml
ocamlc -i epitass.ml > epitass.mli
ocamlc -c epitass.mli
ocamlc -c epitass.ml
ocamlc -o epitass str.cma tree_toyunda_raw.cmo parser_toyunda_raw.cmo lexer_toyunda_raw.cmo tree_v4p_ass.cmo toyunda_raw_to_v4p_ass.cmo epitass.cmo
ocamlc -o epitass tree_toyunda_raw.cmo parser_toyunda_raw.cmo lexer_toyunda_raw.cmo tree_v4p_ass.cmo toyunda_raw_to_v4p_ass.cmo epitass.cmo
```
How to use it ?
==============
......@@ -63,18 +62,18 @@ Example :
What are those formats ?
==============
ASS and Toyunda are formats used to give subtitles to a video. These subtitles can be karaoke subtitles, showing which syllable is voiced when.
SSA/ASS and Toyunda are formats used to give subtitles to a video. These subtitles can be karaoke subtitles, showing which syllable is voiced when.
ASS
SSA/ASS
--------------
SSA/ASS stands for Sub Station Alpha. It's a format used by the programs SubStation Alpha and Aegisub.
SSA stands for Sub Station Alpha. It's a format used by the programs SubStation Alpha and Aegisub. It has several versions : V1, V2, V3, V4 and V4+, the later is called Advanced SubStation Alpha (ASS).
Modern Players can usually read those files without modifications.
EpitASS use the same format as Aegisub. For more informations :
EpitASS use the V4+, same format as Aegisub. For more informations :
http://fileformats.wikia.com/wiki/SubStation_Alpha
http://docs.aegisub.org/3.2/Main_Page/
https://www.matroska.org/technical/specs/subtitles/ssa.html
Toyunda
......@@ -120,7 +119,6 @@ If you want to understand better how epitass works (with the clusters and buffer
Known bugs and lacking functionnalities
==============
- Extended ASCII (latin-1) caracters, such as 'é', 'à', ... are not handled yet. The lexer prints a warning.
- Positionned lines are not printed.
- Lack of command line options.
- Generated ass lines appear in the order of the pipes, not time.
......
Fichier ajouté
......@@ -20,7 +20,7 @@ PARSER=parser_toyunda_raw
LEXER=lexer_toyunda_raw
TREEV4PASS=tree_v4p_ass
TOYUNDATOASS=toyunda_raw_to_v4p_ass
LIBRAIRIES=str.cma
LIBRAIRIES=
MAIN=epitass
PROGRAM=epitass
DOSSIERTOYUNDARAW=Toyunda_Raw
......@@ -33,6 +33,7 @@ all: build clean
# Compilation
build : $(PROGRAM)
standalone: $(PROGRAM).standalone
%.mli: %.ml
ocamlc -i $^ > $@
......@@ -52,6 +53,9 @@ $(LEXER).ml: $(LEXER).mll
$(PROGRAM): $(TREETOYUNDARAW).cmo $(PARSER).cmo $(LEXER).cmo $(TREEV4PASS).cmo $(TOYUNDATOASS).cmo $(MAIN).cmo
ocamlc -o $(PROGRAM) $(LIBRAIRIES) $^
$(PROGRAM).standalone: $(TREETOYUNDARAW).cmo $(PARSER).cmo $(LEXER).cmo $(TREEV4PASS).cmo $(TOYUNDATOASS).cmo $(MAIN).cmo
ocamlc -linkall -custom -o $(PROGRAM).standalone $(LIBRAIRIES) $^
# Dépendances
$(PARSER).ml: $(TREETOYUNDARAW).cmi
$(LEXER).cmo: $(PARSER).cmi
......
......@@ -49,7 +49,6 @@ and sorted_toyunda = cluster list Imap.t (*int keys : number of pipes. ylines wi
and cluster = int (*begin_frame*) * int (*end_frame*) * nline list * yline list
let rec insert_line_in_cluster_list overlap_frame_mode startf endf l cl =
let create_cluster s e l = match l with
| Yline yl -> (s,e,[],[yl])
......@@ -189,6 +188,118 @@ let fprint_sorted_toyunda outch sorted_toyunda =
(*Convert latin1 string to utf8. Toyunda files use latin1, the program prints utf-8. Will be usefull in second step.*)
let utf8_of_latin1 s =
let rec uol i ns =
try
let c = s.[i] in
if c < '\160'
then uol (i+1) (Printf.sprintf "%s%c" ns c)
else
let nc =
if c='\160' then " "
else if c='\161' then "¡"
else if c='\162' then "¢"
else if c='\163' then "£"
else if c='\164' then "¤"
else if c='\165' then "¥"
else if c='\166' then "¦"
else if c='\167' then "§"
else if c='\168' then "¨"
else if c='\169' then "©"
else if c='\170' then "ª"
else if c='\171' then "« "
else if c='\172' then "¬"
else if c='\173' then "-"
else if c='\174' then "®"
else if c='\175' then "¯"
else if c='\176' then "°"
else if c='\177' then "±"
else if c='\178' then "²"
else if c='\179' then "³"
else if c='\180' then "´"
else if c='\181' then "µ"
else if c='\182' then "¶"
else if c='\183' then "·"
else if c='\184' then "¸"
else if c='\185' then "¹"
else if c='\186' then "º"
else if c='\187' then " »"
else if c='\188' then "¼"
else if c='\189' then "½"
else if c='\190' then "¾"
else if c='\191' then "¿"
else if c='\192' then "À"
else if c='\193' then "Á"
else if c='\194' then "Â"
else if c='\195' then "Ã"
else if c='\196' then "Ä"
else if c='\197' then "Å"
else if c='\198' then "Æ"
else if c='\199' then "Ç"
else if c='\200' then "È"
else if c='\201' then "É"
else if c='\202' then "Ê"
else if c='\203' then "Ë"
else if c='\204' then "Ì"
else if c='\205' then "Í"
else if c='\206' then "Î"
else if c='\207' then "Ï"
else if c='\208' then "Ð"
else if c='\209' then "Ñ"
else if c='\210' then "Ò"
else if c='\211' then "Ó"
else if c='\212' then "Ô"
else if c='\213' then "Õ"
else if c='\214' then "Ö"
else if c='\215' then "×"
else if c='\216' then "Ø"
else if c='\217' then "Ù"
else if c='\218' then "Ú"
else if c='\219' then "Û"
else if c='\220' then "Ü"
else if c='\221' then "Ý"
else if c='\222' then "Þ"
else if c='\223' then "ß"
else if c='\224' then "à"
else if c='\225' then "á"
else if c='\226' then "â"
else if c='\227' then "ã"
else if c='\228' then "ä"
else if c='\229' then "å"
else if c='\230' then "æ"
else if c='\231' then "ç"
else if c='\232' then "è"
else if c='\233' then "é"
else if c='\234' then "ê"
else if c='\235' then "ë"
else if c='\236' then "ì"
else if c='\237' then "í"
else if c='\238' then "î"
else if c='\239' then "ï"
else if c='\240' then "ð"
else if c='\241' then "ñ"
else if c='\242' then "ò"
else if c='\243' then "ó"
else if c='\244' then "ô"
else if c='\245' then "õ"
else if c='\246' then "ö"
else if c='\247' then "÷"
else if c='\248' then "ø"
else if c='\249' then "ù"
else if c='\250' then "ú"
else if c='\251' then "û"
else if c='\252' then "ü"
else if c='\253' then "ý"
else if c='\254' then "þ"
else if c='\255' then "ÿ"
else String.make 1 c
in uol (i+1) (String.concat "" [ns;nc])
with Invalid_argument _ -> ns
in
uol 0 ""
(** Second step : black magic in clusters **)(*
1) Manage each cluster separately
......@@ -403,7 +514,7 @@ sorted_toyunda positioned_lines =
| [] -> (first_frame, expected_frame, (String.concat ""
[
already_timed;
try (String.sub b cursor_count ((String.length b) - cursor_count))
try utf8_of_latin1 (String.sub b cursor_count ((String.length b) - cursor_count))
with Invalid_argument _ -> let _ = Printf.eprintf "[Debug] b=%s cc=%d\n" b cursor_count in ""
]))
| (sf,ef,yc)::slt ->
......@@ -421,7 +532,7 @@ sorted_toyunda positioned_lines =
time b slt first_frame ef (
String.concat "" [
already_timed;
(try String.sub b cursor_count (yc - cursor_count)
(try utf8_of_latin1 (String.sub b cursor_count (yc - cursor_count))
with Invalid_argument _ -> let _ = Printf.eprintf "[Debug] b=%s cc=%d yc=%d\n" b cursor_count yc in "");
invisible_tag; "{\\k";kstring;"}"
]
......@@ -465,8 +576,6 @@ sorted_toyunda positioned_lines =
let build_ass (style_list : Tree_v4p_ass.style_values list) (event_list : Tree_v4p_ass.event_values list) playResX playResY audio_file video_file =
(
(playResX,playResY,audio_file,video_file,[],[]),
......
......@@ -5,9 +5,16 @@
* structure of the trees storing v4+ aegisub-generated-style ass files *
\***********************************************************************)
(*TODO : use http://fileformats.wikia.com/wiki/SubStation_Alpha*)
type t = script_info * v4pstyle * events * pictures * fonts
and script_info = int(*playResX*) * int(*playResY*) * string(*audio_File*) * string(*video_File*) * string list(*other_comments*) * (string(*info_name*) * string(*info_value*)) list(*other_info_list*)
and script_info =
int(*playResX*)
* int(*playResY*)
* string(*audio_File*)
* string(*video_File*)
* string list(*other_comments*)
* (string(*info_name*) * string(*info_value*)) list(*other_info_list*)
and v4pstyle = string(*other_field_name*) list(*format*) * style_values list(*styles*)
and style_values =
......
0% Chargement en cours ou .
You are about to add 0 people to the discussion. Proceed with caution.
Terminez d'abord l'édition de ce message.
Veuillez vous inscrire ou vous pour commenter