diff --git a/README.md b/README.md index a4ccc293af2508e17c536fe75658eed5f90a8731..1b3cb118eb1717d08840c046499e7fa75b5c96bd 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,13 @@ What is EpitASS ? ============== -EpitASS is a converter for karaoke subtitle files from Toyunda Raw format to v4+ aegisub-generated-style ASS format. +EpitASS is a converter for karaoke subtitle files from Toyunda Raw format to V4+ aegisub-generated-style ASS format. +Project repository : https://framagit.org/Seipas/EpitASS How to build it ? ============== -- Install `ocamlc`, `ocamllex` and `ocamlyacc` +- Install `ocamlrun`, `ocamlc`, `ocamllex` and `ocamlyacc` - Run `make` @@ -32,11 +33,9 @@ ocamlc -c toyunda_raw_to_v4p_ass.ml ocamlc -i epitass.ml > epitass.mli ocamlc -c epitass.mli ocamlc -c epitass.ml -ocamlc -o epitass str.cma tree_toyunda_raw.cmo parser_toyunda_raw.cmo lexer_toyunda_raw.cmo tree_v4p_ass.cmo toyunda_raw_to_v4p_ass.cmo epitass.cmo +ocamlc -o epitass tree_toyunda_raw.cmo parser_toyunda_raw.cmo lexer_toyunda_raw.cmo tree_v4p_ass.cmo toyunda_raw_to_v4p_ass.cmo epitass.cmo ``` - - How to use it ? ============== @@ -63,18 +62,18 @@ Example : What are those formats ? ============== -ASS and Toyunda are formats used to give subtitles to a video. These subtitles can be karaoke subtitles, showing which syllable is voiced when. +SSA/ASS and Toyunda are formats used to give subtitles to a video. These subtitles can be karaoke subtitles, showing which syllable is voiced when. -ASS +SSA/ASS -------------- -SSA/ASS stands for Sub Station Alpha. It's a format used by the programs SubStation Alpha and Aegisub. +SSA stands for Sub Station Alpha. It's a format used by the programs SubStation Alpha and Aegisub. It has several versions : V1, V2, V3, V4 and V4+, the later is called Advanced SubStation Alpha (ASS). Modern Players can usually read those files without modifications. -EpitASS use the same format as Aegisub. For more informations : +EpitASS use the V4+, same format as Aegisub. For more informations : +http://fileformats.wikia.com/wiki/SubStation_Alpha http://docs.aegisub.org/3.2/Main_Page/ -https://www.matroska.org/technical/specs/subtitles/ssa.html Toyunda @@ -120,7 +119,6 @@ If you want to understand better how epitass works (with the clusters and buffer Known bugs and lacking functionnalities ============== -- Extended ASCII (latin-1) caracters, such as 'é', 'à ', ... are not handled yet. The lexer prints a warning. - Positionned lines are not printed. - Lack of command line options. - Generated ass lines appear in the order of the pipes, not time. diff --git a/epitass.standalone b/epitass.standalone new file mode 100755 index 0000000000000000000000000000000000000000..1d6c8ff0777b4e49824f220ef71d40b7db19f6ac Binary files /dev/null and b/epitass.standalone differ diff --git a/makefile b/makefile index 2b026f8c758afa61b93c422e5aefeb3be47a71f6..29a21c5922039b3cd5341a884a1ce7bf1f0fec4b 100644 --- a/makefile +++ b/makefile @@ -20,7 +20,7 @@ PARSER=parser_toyunda_raw LEXER=lexer_toyunda_raw TREEV4PASS=tree_v4p_ass TOYUNDATOASS=toyunda_raw_to_v4p_ass -LIBRAIRIES=str.cma +LIBRAIRIES= MAIN=epitass PROGRAM=epitass DOSSIERTOYUNDARAW=Toyunda_Raw @@ -33,6 +33,7 @@ all: build clean # Compilation build : $(PROGRAM) +standalone: $(PROGRAM).standalone %.mli: %.ml ocamlc -i $^ > $@ @@ -52,6 +53,9 @@ $(LEXER).ml: $(LEXER).mll $(PROGRAM): $(TREETOYUNDARAW).cmo $(PARSER).cmo $(LEXER).cmo $(TREEV4PASS).cmo $(TOYUNDATOASS).cmo $(MAIN).cmo ocamlc -o $(PROGRAM) $(LIBRAIRIES) $^ +$(PROGRAM).standalone: $(TREETOYUNDARAW).cmo $(PARSER).cmo $(LEXER).cmo $(TREEV4PASS).cmo $(TOYUNDATOASS).cmo $(MAIN).cmo + ocamlc -linkall -custom -o $(PROGRAM).standalone $(LIBRAIRIES) $^ + # Dépendances $(PARSER).ml: $(TREETOYUNDARAW).cmi $(LEXER).cmo: $(PARSER).cmi diff --git a/toyunda_raw_to_v4p_ass.ml b/toyunda_raw_to_v4p_ass.ml index f22d1f097e3f9df930a386ebc37880624ef12136..8fea8cf81b0870a00c62e783ea4a3c766f8db5e2 100644 --- a/toyunda_raw_to_v4p_ass.ml +++ b/toyunda_raw_to_v4p_ass.ml @@ -49,7 +49,6 @@ and sorted_toyunda = cluster list Imap.t (*int keys : number of pipes. ylines wi and cluster = int (*begin_frame*) * int (*end_frame*) * nline list * yline list - let rec insert_line_in_cluster_list overlap_frame_mode startf endf l cl = let create_cluster s e l = match l with | Yline yl -> (s,e,[],[yl]) @@ -189,6 +188,118 @@ let fprint_sorted_toyunda outch sorted_toyunda = +(*Convert latin1 string to utf8. Toyunda files use latin1, the program prints utf-8. Will be usefull in second step.*) +let utf8_of_latin1 s = + let rec uol i ns = + try + let c = s.[i] in + if c < '\160' + then uol (i+1) (Printf.sprintf "%s%c" ns c) + else + let nc = + if c='\160' then " " + else if c='\161' then "¡" + else if c='\162' then "¢" + else if c='\163' then "£" + else if c='\164' then "¤" + else if c='\165' then "Â¥" + else if c='\166' then "¦" + else if c='\167' then "§" + else if c='\168' then "¨" + else if c='\169' then "©" + else if c='\170' then "ª" + else if c='\171' then "« " + else if c='\172' then "¬" + else if c='\173' then "-" + else if c='\174' then "®" + else if c='\175' then "¯" + else if c='\176' then "°" + else if c='\177' then "±" + else if c='\178' then "²" + else if c='\179' then "³" + else if c='\180' then "´" + else if c='\181' then "µ" + else if c='\182' then "¶" + else if c='\183' then "·" + else if c='\184' then "¸" + else if c='\185' then "¹" + else if c='\186' then "º" + else if c='\187' then " »" + else if c='\188' then "¼" + else if c='\189' then "½" + else if c='\190' then "¾" + else if c='\191' then "¿" + else if c='\192' then "À" + else if c='\193' then "Ã" + else if c='\194' then "Â" + else if c='\195' then "Ã" + else if c='\196' then "Ä" + else if c='\197' then "Ã…" + else if c='\198' then "Æ" + else if c='\199' then "Ç" + else if c='\200' then "È" + else if c='\201' then "É" + else if c='\202' then "Ê" + else if c='\203' then "Ë" + else if c='\204' then "ÃŒ" + else if c='\205' then "Ã" + else if c='\206' then "ÃŽ" + else if c='\207' then "Ã" + else if c='\208' then "Ã" + else if c='\209' then "Ñ" + else if c='\210' then "Ã’" + else if c='\211' then "Ó" + else if c='\212' then "Ô" + else if c='\213' then "Õ" + else if c='\214' then "Ö" + else if c='\215' then "×" + else if c='\216' then "Ø" + else if c='\217' then "Ù" + else if c='\218' then "Ú" + else if c='\219' then "Û" + else if c='\220' then "Ü" + else if c='\221' then "Ã" + else if c='\222' then "Þ" + else if c='\223' then "ß" + else if c='\224' then "à " + else if c='\225' then "á" + else if c='\226' then "â" + else if c='\227' then "ã" + else if c='\228' then "ä" + else if c='\229' then "Ã¥" + else if c='\230' then "æ" + else if c='\231' then "ç" + else if c='\232' then "è" + else if c='\233' then "é" + else if c='\234' then "ê" + else if c='\235' then "ë" + else if c='\236' then "ì" + else if c='\237' then "Ã" + else if c='\238' then "î" + else if c='\239' then "ï" + else if c='\240' then "ð" + else if c='\241' then "ñ" + else if c='\242' then "ò" + else if c='\243' then "ó" + else if c='\244' then "ô" + else if c='\245' then "õ" + else if c='\246' then "ö" + else if c='\247' then "÷" + else if c='\248' then "ø" + else if c='\249' then "ù" + else if c='\250' then "ú" + else if c='\251' then "û" + else if c='\252' then "ü" + else if c='\253' then "ý" + else if c='\254' then "þ" + else if c='\255' then "ÿ" + else String.make 1 c + in uol (i+1) (String.concat "" [ns;nc]) + with Invalid_argument _ -> ns + in + uol 0 "" + + (** Second step : black magic in clusters **)(* 1) Manage each cluster separately @@ -403,7 +514,7 @@ sorted_toyunda positioned_lines = | [] -> (first_frame, expected_frame, (String.concat "" [ already_timed; - try (String.sub b cursor_count ((String.length b) - cursor_count)) + try utf8_of_latin1 (String.sub b cursor_count ((String.length b) - cursor_count)) with Invalid_argument _ -> let _ = Printf.eprintf "[Debug] b=%s cc=%d\n" b cursor_count in "" ])) | (sf,ef,yc)::slt -> @@ -421,7 +532,7 @@ sorted_toyunda positioned_lines = time b slt first_frame ef ( String.concat "" [ already_timed; - (try String.sub b cursor_count (yc - cursor_count) + (try utf8_of_latin1 (String.sub b cursor_count (yc - cursor_count)) with Invalid_argument _ -> let _ = Printf.eprintf "[Debug] b=%s cc=%d yc=%d\n" b cursor_count yc in ""); invisible_tag; "{\\k";kstring;"}" ] @@ -465,8 +576,6 @@ sorted_toyunda positioned_lines = - - let build_ass (style_list : Tree_v4p_ass.style_values list) (event_list : Tree_v4p_ass.event_values list) playResX playResY audio_file video_file = ( (playResX,playResY,audio_file,video_file,[],[]), diff --git a/tree_v4p_ass.ml b/tree_v4p_ass.ml index f2a70eda6d9013579c74ba819a6f25eba22b38a9..e0c396fe36dfb7353b61731c7dc031c11c9240e7 100644 --- a/tree_v4p_ass.ml +++ b/tree_v4p_ass.ml @@ -5,9 +5,16 @@ * structure of the trees storing v4+ aegisub-generated-style ass files * \***********************************************************************) +(*TODO : use http://fileformats.wikia.com/wiki/SubStation_Alpha*) type t = script_info * v4pstyle * events * pictures * fonts -and script_info = int(*playResX*) * int(*playResY*) * string(*audio_File*) * string(*video_File*) * string list(*other_comments*) * (string(*info_name*) * string(*info_value*)) list(*other_info_list*) +and script_info = + int(*playResX*) + * int(*playResY*) + * string(*audio_File*) + * string(*video_File*) + * string list(*other_comments*) + * (string(*info_name*) * string(*info_value*)) list(*other_info_list*) and v4pstyle = string(*other_field_name*) list(*format*) * style_values list(*styles*) and style_values =