#======================================================================== # ========== Textgrid to Transcript - v0.4 beta ======================== #======================================================================== # # Description: This script will convert a TextGrid into a basic # transcript in the style of conversation analysis, following the GAT # conventions. # # Usage: Select a Textgrid in the "Objects" window and run the script. # # Author: Oliver Ehmer # Contact: www.oliverehmer.de # # For a more elaborate tool serving similar purposes please check: # www.oliverehmer.de/transformer # # TextGrid-to-Transcript by Oliver Ehmer is licensed under a Creative # Commons Attribution-NonCommercial-ShareAlike 4.0 International License. # http://creativecommons.org/licenses/by-nc-sa/4.0/ # # Restriction: Military use is not allowed. # #======================================================================== #======================================================================== #===================================== fixed values tierAbbreviationsEndCharacter$=":" # please be aware that setting the following variable to 1 will sometimes result in errors nativizeSpecialCharacters=0 #===================================== check the current praat version praatVersionStr$=praatVersion$ praatVersionStr$= replace$ (praatVersionStr$, ".", ",", 1) praatVersionStr$= replace$ (praatVersionStr$, ".", "", 1) praatVersionStr$= replace$ (praatVersionStr$, ",", ".", 1) praatVersionNumber = number(praatVersionStr$) if praatVersionNumber<5.406 exitScript: "Please download the latest version of Praat from praat.org.", newline$, newline$, "This script requires version 5.4.06 or newer to run.", newline$, newline$, "Your version is ", praatVersion$, newline$, newline$ endif #===================================== check if a single textGrid is selected #---check if a textgrid is selected, otherwise exit script n = numberOfSelected () if n>1 exitScript: "Please select only one TextGrid." endif n = numberOfSelected ("TextGrid") if n=0 exitScript: "Please select a TextGrid before running the script." elsif n>1 exitScript: "Please select only one TextGrid." endif selectedTextgrid$ = selected$ ("TextGrid", 1) #===================================== user inputs #---ask user to type in # tier numbers that are to be included # which is the pause tier # if line width should be limited form TextGrid to Transcript comment TIERS comment Type in the numbers of the tiers you want to export, separated by spaces. comment Use -1 if you want to export all tiers. text include_tier_numbers -1 comment TIER ACRONYMS comment Set the length of the speaker acronyms. comment Use -1 if you want to use the full names. The maximum is 25. text tier_name_length 1 comment PAUSES comment Type in the number of the tier that contains the pauses. comment Use -1 if there is no pause tier in your TextGrid. text pause_tier_number -1 comment TRANSCRIPT WIDTH comment Set the width of the transcript. comment Use -1 if you do not want to limit the width of the transcript. The minimum is 40. text transcript_width 60 comment Oliver Ehmer 2016, Creative Commons license endform #---procedure to split the tiers string by blanks #This procedure is taken from http://www.ucl.ac.uk/~ucjt465/scripts/ procedure split (.str$, .sep$) .seplen = length(.sep$) .length = 0 repeat .strlen = length(.str$) .sep = index(.str$, .sep$) if .sep > 0 .part$ = left$(.str$, .sep-1) .str$ = mid$(.str$, .sep+.seplen, .strlen) else .part$ = .str$ endif .length = .length+1 .array$[.length] = .part$ until .sep = 0 endproc #--- remove left right and double spaces from user settings include_tier_numbers$=replace_regex$(include_tier_numbers$, " +"," ",0) include_tier_numbers$=replace_regex$(include_tier_numbers$, "^ *","",0) include_tier_numbers$=replace_regex$(include_tier_numbers$, " *$","",0) pause_tier_number$=replace_regex$(pause_tier_number$, " +"," ",0) pause_tier_number$=replace_regex$(pause_tier_number$, "^ *","",0) pause_tier_number$=replace_regex$(pause_tier_number$, " *$","",0) tier_name_length$=replace_regex$(tier_name_length$, " +"," ",0) tier_name_length$=replace_regex$(tier_name_length$, "^ *","",0) tier_name_length$=replace_regex$(tier_name_length$, " *$","",0) transcript_width$=replace_regex$(transcript_width$, " +"," ",0) transcript_width$=replace_regex$(transcript_width$, "^ *","",0) transcript_width$=replace_regex$(transcript_width$, " *$","",0) #===================================== process user inputs #--- exit if transcript width is too short or too long transcript_width = number(transcript_width$) if transcript_width = -1 elsif transcript_width < 40 exitScript: "Error: The width of the transcript is to low. Minimum is 40. Check option TRANSCRIPT WIDTH." endif #--- exit if tier length is too short or too long tier_name_length = number(tier_name_length$) if tier_name_length = -1 elsif tier_name_length =0 exitScript: "Error: Length of tier names is to short. Minimum is 1. Check option TIER ACRONYMS." elsif tier_name_length <-1 exitScript: "Error: Length of tier names is to short. Minimum is 1. Check option TIER ACRONYMS." elsif tier_name_length >25 exitScript: "Error: Length of tier names is to long. Maximum is 25. Check option TIER ACRONYMS." endif #--- exit if no tiers are selected if include_tier_numbers$ = "" exitScript: "Error: No tiers selected. Check option TIERS." endif #--- get the name of the pause tier if pause_tier_number$="-1" else pause_tier_name$ = "" pause_tier_name$ = nocheck Get tier name: 'pause_tier_number$' if pause_tier_name$ = "" exitScript: "Error: The TextGrid you selected does not contain a pause tier with the number: '" + pause_tier_number$ +"' Check option PAUSES." endif endif pause_tier_number = number(pause_tier_number$) #===================================== make new textgrid based on selected tiers #---check if user has selected some tiers # and calculate the maximum length of the tier names if include_tier_numbers$="-1" #include all tiers selectObject: "TextGrid " + selectedTextgrid$ Copy: "newTextGrid" else #split the string the user has set and loop thoough all tiers all_tier_names$="" @split: include_tier_numbers$, " " number_of_tiers = split.length #extract all tiers for i to number_of_tiers #select our original textgrid nocheck selectObject: "TextGrid " + selectedTextgrid$ #get the number tierNr$ = split.array$[i] tierNr = number(tierNr$) #check if this tier is the pause tier if tierNr$=pause_tier_number$ pause_tier_number = i endif #extract the tier tier_'i' = nocheck Extract one tier: 'tierNr' if tier_'i' = undefined #remove individual tiers for j from 1 to i nocheck selectObject: tier_'j' nocheck Remove endfor exitScript: "Error: The Textgrid you selected does not contain a tier with the number: '" + tierNr$ + "' Check option TIERS." else endif endfor #merge individual tiers to new textgrid selectObject: 'tier_1' if number_of_tiers > 1 for i from 2 to number_of_tiers plusObject: tier_'i' endfor endif newTextGrid = Merge Rename: "newTextGrid" endif #===================================== calculate the width of the speaker abbreviations #--- full name if tier_name_length = -1 #--- calculate the longest tier name selectObject: "TextGrid newTextGrid" number_of_tiers = Get number of tiers max_len_tier_name = -1 if number_of_tiers = 0 exitScript: "Error: No tiers identified. Check option TIERS." endif for i to number_of_tiers if pause_tier_number =i #do not count the length of the name of the pause tier else tier_name$ = Get tier name: 'i' if length(tier_name$) > max_len_tier_name max_len_tier_name = length(tier_name$) endif endif endfor tierAbbreviationsLen = max_len_tier_name #-- user has set the length of the tier names else tierAbbreviationsLen = tier_name_length endif #===================================== create replacements for empty line beginnings #---tiers tierReplacement$="" for i to length(tierAbbreviationsEndCharacter$) + tierAbbreviationsLen tierReplacement$ = tierReplacement$ + " " endfor #---line numbers lineNumberReplacement$ = " " #===================================== calculate width of text section splitLength = transcript_width - length(tierReplacement$) - 2 - 2 #===================================== rename pause #--- if a pause is specified set name of the tier to spaces selectObject: "TextGrid newTextGrid" if pause_tier_number > -1 Set tier name: 'pause_tier_number', tierReplacement$ endif #===================================== convert textgrid to table #---special characters if nativizeSpecialCharacters=1 Nativize endif #---convert textgrid to table Down to Table: "no", 6, "yes", "no" Rename: "myTextgridToGatTable" #---sort by time Sort rows: "tmin tier" #---append columns Append column: "line" Append column: "speaker" Append column: "intervalText" #===================================== generate output lastTier$="" tierNameOutput$="" output$ = selectedTextgrid$ output$ = output$ + newline$ output$ = output$ + newline$ #writeInfoLine: selectedTextgrid$ #appendInfoLine: " " nrRows = Get number of rows lastprogress=0 for myRowNr to nrRows #- output progress newprogress = round((myRowNr/nrRows)*10) if newprogress>lastprogress writeInfoLine: "Progress " + string$(newprogress*10) + "%" lastprogress=newprogress endif #- line numbers if myRowNr<10 lineNumber$ = "0" + string$(myRowNr) else lineNumber$ = string$(myRowNr) endif Set string value: myRowNr, "line", lineNumber$ #- add as space to the relacement string if myRowNr = 100 lineNumberReplacement$ = lineNumberReplacement$ + " " endif if myRowNr = 1000 lineNumberReplacement$ = lineNumberReplacement$ + " " endif #- speakers currentTier$ = Get value: myRowNr, "tier" if currentTier$ = lastTier$ #replace repeated speaker names by nothing tierNameOutput$=tierReplacement$ Set string value: myRowNr, "speaker", tierNameOutput$ else # if current tier is a pause use the replaement string if currentTier$ = tierReplacement$ tierNameOutput$ = currentTier$ #if it is no pause format the name else tierNameOutput$ = currentTier$ #cut speaker names that are too long if length(tierNameOutput$)>tierAbbreviationsLen tierNameOutput$=left$(tierNameOutput$, tierAbbreviationsLen) endif missing_spaces = tierAbbreviationsLen - length(tierNameOutput$) # add the the end character tierNameOutput$ = tierNameOutput$ + tierAbbreviationsEndCharacter$ #add spaces at the end if missing_spaces>0 for space to missing_spaces tierNameOutput$ = tierNameOutput$ + " " endfor endif Set string value: myRowNr, "speaker", tierNameOutput$ endif endif #remember the current tier as the new last tier lastTier$ = currentTier$ #- text intervalText$= Get value: myRowNr, "text" Set string value: myRowNr, "intervalText", intervalText$ #- output everything together #if intervalTexts are not to be splitted if transcript_width = -1 #appendInfoLine: lineNumber$ + " " + tierNameOutput$ + " " + intervalText$ output$ = output$ + lineNumber$ + " " + tierNameOutput$ + " " + intervalText$ + newline$ #if we want to limit the width else #if text does not need to be splitted if length(intervalText$) <= splitLength textPart$[1] = intervalText$ splits=1 #if text needs to be splitted else splits=0 rest$=intervalText$ repeat #get the first n characters of the string that is left over check_part$ = left$(rest$, splitLength) #get the last occurrrence of a space where the text may be splitted splitPos=rindex(check_part$, " ") #get this segment of the string that is left over splits=splits+1 textPart$[splits] = left$(rest$, splitPos) #leave the rest in the string that is left over lenRest=length(rest$)-splitPos rest$=right$(rest$,lenRest) #if what is left over is short enough if length(rest$) <= splitLength splits=splits+1 textPart$[splits] = rest$ rest$="" endif until length(rest$)=0 endif for mySplitNr to splits if mySplitNr=1 #appendInfoLine: lineNumber$ + " " + tierNameOutput$ + " " + textPart$[mySplitNr] output$ = output$ + lineNumber$ + " " + tierNameOutput$ + " " + textPart$[mySplitNr] + newline$ else #appendInfoLine: lineNumberReplacement$ + " " + tierReplacement$ + " " + textPart$[mySplitNr] output$ = output$ + lineNumberReplacement$ + " " + tierReplacement$ + " " + textPart$[mySplitNr] + newline$ endif endfor endif tierNameOutput$="" endfor writeInfoLine: output$ #===================================== Clean up #---remove table and extract nocheck selectObject: "Table myTextgridToGatTable" nocheck Remove nocheck selectObject: "Table myTextgridToGatTable_extract" nocheck Remove #---remove individual tiers if include_tier_numbers$="-1" else for i from 1 to number_of_tiers nocheck selectObject: tier_'i' nocheck Remove endfor endif #--- remove intermediary textgrid nocheck selectObject: "TextGrid newTextGrid" nocheck Remove #----- select original textgrid nocheck selectObject: "TextGrid " + selectedTextgrid$