surveyer.plotter
1import subprocess 2import shutil 3import yaml 4import os 5 6from IPython.display import Markdown, display 7import matplotlib.pyplot as plt 8import pandas as pd 9import seaborn as sns 10import re 11 12from surveyer.surveyreader import DataSet 13 14class ReportMaker: 15 def __init__(self, datasetpath="", configpath="", outfilepath=""): 16 self.conf = False 17 self.dataset = False 18 if outfilepath: 19 self.outpath = outfilepath.rstrip("/") + "/" 20 else: 21 self.outpath = outfilepath 22 23 if configpath: 24 with open(configpath, "r") as file: 25 self.conf = yaml.safe_load(file) 26 if datasetpath: 27 self.dataset = DataSet(datasetpath) 28 29 def create_report(self, output="display", writepdf=False): 30 """Creates the full report from the provided configuration (structure.yml) file. 31 Can output to notebook (display) or as markdown files (pages). 32 If 'writepdf' is set, a summary markdown and pdf is produced. You can set the pdfname in the configuration file.""" 33 34 if not self.conf: 35 print("You did not provide a config file!") 36 return 37 38 if output == "pages": 39 indexpage = ( 40 "# EVERSE software quality landscaping survey\n\n## Survey results\n\n" 41 ) 42 43 if not os.path.exists(self.outpath + "pages/"): 44 os.makedirs(self.outpath + "pages/") 45 46 if not os.path.exists(self.outpath + "pages/figures/"): 47 os.makedirs(self.outpath + "pages/figures/") 48 49 if writepdf: 50 alltext = ( 51 "# EVERSE software quality landscaping survey\n\n## Survey results\n\n" 52 ) 53 54 # write pages for all "chapters" defined in the configuration 55 for chap in self.conf: 56 elements = [] 57 title = "" 58 if chap.find("chap") > -1: 59 title = chap.lstrip("chap_") 60 if "title" in self.conf[chap]: 61 title = self.conf[chap]["title"] 62 elements.append("## " + title + "\n") 63 if "description" in self.conf[chap]: 64 elements.append(self.conf[chap]["description"] + "\n") 65 for subkey in self.conf[chap]: 66 cluster = self.conf[chap][subkey] 67 if "title" in cluster: 68 elements.append("### " + cluster["title"] + "\n") 69 if "description" in cluster: 70 elements.append(cluster["description"] + "\n") 71 if "table" in subkey: 72 if not "alttitles" in cluster: 73 elements.append( 74 self.make_table(cluster["identifiers"]) + "\n" 75 ) 76 else: 77 elements.append( 78 self.make_table( 79 cluster["identifiers"], cluster["alttitles"] 80 ) 81 + "\n" 82 ) 83 84 if "count" in subkey: 85 charttype = "bar" 86 title = "Counts" 87 altanswers = [] 88 if "altoptions" in cluster: 89 altanswers = cluster["altoptions"] 90 if "title" in cluster: 91 title = cluster["title"] 92 if "charttype" in cluster: 93 charttype = cluster["charttype"] 94 plotname = self.make_count_chart( 95 cluster["identifier"], charttype, title, altanswers 96 ) 97 if plotname: 98 if os.path.exists( 99 self.outpath + "pages/figures/" + plotname 100 ): 101 os.remove(self.outpath + "pages/figures/" + plotname) 102 shutil.move(plotname, self.outpath + "pages/figures/") 103 elements.append("") 104 105 if "compare" in subkey: 106 selector = "" 107 altanswers = [] 108 if "title" in cluster: 109 title = cluster["title"] 110 if "altoptions" in cluster: 111 altanswers = cluster["altoptions"] 112 plotname = self.make_compare_likert( 113 cluster["identifier"], 114 cluster["selectedparts"], 115 title, 116 altanswers, 117 ) 118 if plotname: 119 if os.path.exists( 120 self.outpath + "pages/figures/" + plotname 121 ): 122 os.remove(self.outpath + "pages/figures/" + plotname) 123 shutil.move(plotname, self.outpath + "pages/figures/") 124 elements.append("") 125 126 if "rating" in subkey: 127 if "title" in cluster: 128 title = cluster["title"] 129 plotname = self.make_rating(cluster["identifier"], title) 130 if plotname: 131 if os.path.exists( 132 self.outpath + "pages/figures/" + plotname 133 ): 134 os.remove(self.outpath + "pages/figures/" + plotname) 135 shutil.move(plotname, self.outpath + "pages/figures/") 136 elements.append("") 137 138 if "multirate" in subkey: 139 exclusion = "" 140 if "title" in cluster: 141 title = cluster["title"] 142 if "exclude" in cluster: 143 exclusion = cluster["exclude"] 144 plotname = self.make_multirating( 145 cluster["identifier"], title, exclusion 146 ) 147 if plotname: 148 if os.path.exists( 149 self.outpath + "pages/figures/" + plotname 150 ): 151 os.remove(self.outpath + "pages/figures/" + plotname) 152 shutil.move(plotname, self.outpath + "pages/figures/") 153 elements.append("") 154 155 if output == "display": 156 for element in elements: 157 display(Markdown(element)) 158 elif output == "pages": 159 filename = chap 160 if "filename" in self.conf[chap]: 161 filename = self.conf[chap]["filename"] 162 title = chap.lstrip("chap_") 163 if "title" in self.conf[chap]: 164 title = self.conf[chap]["title"] 165 elements.append("## " + title + "\n") 166 167 with open(self.outpath + "pages/" + filename + ".md", "w") as f: 168 for element in elements: 169 f.write(element + "\n") 170 print("Title:", title) 171 indexpage += "- [" + str(title) + "](pages/" + filename + ".md)\n" 172 173 if writepdf: 174 alltext += "\n\n".join( 175 [el.replace("figures/", "pages/figures/") for el in elements] 176 ) 177 178 if output == "pages": 179 indexpage += "\nBack to the [repository](https://youschnabel.github.io/EVERSE-landscape/), or look up the [code documentation](https://youschnabel.github.io/EVERSE-landscape/pydocs/surveyer.html)!\n" 180 indexpage += "\nThis site was built using [GitHub Pages](https://pages.github.com) and Jekyll.\n" 181 with open(self.outpath + "index.md", "w") as f: 182 f.write(indexpage) 183 184 if writepdf: 185 pdffilepath = self.outpath + "EVERSEsurveyresults.pdf" 186 if "pdfname" in self.conf: 187 pdffilepath = self.outpath + self.conf["pdfname"] 188 mdfilepath = self.outpath + pdffilepath.rstrip(".pdf") + ".md" 189 with open(mdfilepath, "w") as temp_md_file: 190 temp_md_file.write(alltext) 191 try: 192 # Call Pandoc to convert the Markdown file to PDF 193 subprocess.run( 194 [ 195 "pandoc", 196 mdfilepath, 197 "--from=markdown", 198 "--to=pdf", 199 "--output", 200 pdffilepath, 201 "--template=template.tex", 202 ], 203 check=True, 204 ) 205 print(f"PDF generated successfully: {pdffilepath}") 206 except subprocess.CalledProcessError: 207 print("Error: Pandoc failed to convert Markdown to PDF.") 208 209 def make_table(self, questionids, alttitles=[]): 210 """Produces table from text answers, deleting rows without answers.""" 211 212 acceptedtypes = ["text", "enumerate", "select"] 213 214 df_all = self.dataset.extract_subset(questionids, acceptedtypes) 215 df = df_all.dropna(how="all").fillna("") 216 df = df.applymap(_format_urls_in_text) 217 218 if alttitles and len(alttitles) == len(questionids): 219 newnames = {} 220 for i in range(len(alttitles)): 221 newnames.setdefault( 222 self.dataset.metadata[questionids[i]]["question"], alttitles[i] 223 ) 224 df = df.rename(columns=newnames) 225 226 return df.to_markdown() 227 228 def make_count_chart(self, questionid, charttype="bar", title="", altanswers=[]): 229 """Produces a graphic to show basic statistic of selection or enumeration questions. 230 Chart type can be 'bar', 'pie' or 'line', and you can provide alternative title and answer lables.""" 231 232 acceptedtypes = ["select", "enumerate"] 233 234 if not title: 235 title = "Count of responses" 236 237 df_extract = self.dataset.extract_subset([questionid], acceptedtypes) 238 239 if self.dataset.metadata["A2"]["entrytype"] == "enumerate": 240 df_nona = df_extract.dropna() 241 df_enlisted = [ 242 entry.split("; ") for entry in df_nona[df_extract.columns[0]] 243 ] 244 df_extract = pd.DataFrame( 245 { 246 df_extract.columns[0]: [ 247 item for sublist in df_enlisted for item in sublist 248 ] 249 } 250 ) 251 252 answer_counts = df_extract.value_counts() 253 254 # create the bar chart 255 plt.figure(figsize=(6, 4)) 256 if charttype != "pie": 257 answer_counts.plot(kind=charttype) 258 plt.xlabel("Response") 259 plt.ylabel("Frequency") 260 plt.xticks(rotation=40, ha="right") 261 else: 262 plt.xlabel("Number of entries: " + str(answer_counts.sum())) 263 264 plt.title(title) 265 266 if altanswers: 267 if len(answer_counts) == len(altanswers): 268 if charttype != "pie": 269 plt.xticks( 270 ticks=range(len(altanswers)), 271 labels=altanswers, 272 rotation=40, 273 ha="right", 274 ) 275 else: 276 plt.pie( 277 answer_counts, 278 labels=altanswers, 279 autopct="%1.1f%%", 280 startangle=90, 281 ) 282 else: 283 print( 284 "Alternative options length", 285 len(altanswers), 286 "does not match required length", 287 len(answer_counts), 288 ) 289 290 filename = ( 291 "plot_" 292 + charttype 293 + "_" 294 + str(questionid) 295 .replace("[", "") 296 .replace("]", "") 297 .replace(",", "_") 298 .replace("'", "") 299 + ".png" 300 ) 301 302 plt.tight_layout() 303 # Save the chart as an image 304 plt.savefig(filename) 305 306 return filename 307 308 def make_compare_likert(self, questionid, subquestion, title="", altanswers=[]): 309 """Displays several Likert-scale type answers for comparison. 310 Needs subquestion to select all subquestions which contain the provided string. 311 Can be passed title for the plot and answer options to replace the original ones. 312 Scales can be provided under the 'scales' entry in the configuration file. 313 """ 314 acceptedtypes = ["select"] 315 316 if not title: 317 title = "Estimate" 318 319 # df_extract = self.dataset.extract_subset(questionids, acceptedtypes) 320 # # also extract subquestion 321 322 columns = [] 323 altnames = [] 324 for i in range(len(self.dataset.metadata[questionid]["subquestions"])): 325 otherpart = [] 326 foundpart = "" 327 for entry in self.dataset.metadata[questionid]["subquestions"][i]: 328 if entry.find(subquestion) > -1: 329 foundpart = self.dataset.metadata[questionid]["colnames"][i] 330 else: 331 otherpart.append(entry) 332 if foundpart: 333 columns.append(foundpart) 334 altnames.append(otherpart) 335 336 if altanswers: 337 altnames = altanswers 338 else: 339 altnames = [ 340 str(ent).replace("['", "").replace("']", "").replace("','", " : ") 341 for ent in altnames 342 ] 343 344 plottype = "" 345 options = "" 346 347 for col in columns: 348 if col in self.dataset.metadata[questionid]["params"]["options"]: 349 if not options: 350 options = self.dataset.metadata[questionid]["params"]["options"][ 351 col 352 ] 353 else: 354 newopts = self.dataset.metadata[questionid]["params"]["options"][ 355 col 356 ] 357 for opt in newopts: 358 if not opt in options: 359 options.append(opt) 360 361 if col in self.dataset.metadata[questionid]["params"]["subtypes"]: 362 if not plottype: 363 plottype = self.dataset.metadata[questionid]["params"]["subtypes"][ 364 col 365 ] 366 else: 367 if ( 368 self.dataset.metadata[questionid]["params"]["subtypes"][col] 369 != plottype 370 ): 371 print( 372 "Question types don't match!", 373 self.dataset.metadata[questionid]["params"]["subtypes"][ 374 col 375 ], 376 plottype, 377 ) 378 plottype = "mismatch" 379 380 scale = [] 381 382 if "scales" in self.conf: 383 for skey in self.conf["scales"]: 384 if sorted(self.conf["scales"][skey]["options"]) == sorted(options): 385 scale = self.conf["scales"][skey]["options"] 386 387 if not scale: 388 scale = options 389 390 df = self.dataset.data[columns] 391 392 df_melted = df.melt(var_name=subquestion, value_name="Response") 393 394 # Plot the grouped bar chart 395 plt.figure(figsize=(12, 6)) 396 sns.countplot( 397 data=df_melted, 398 x=subquestion, 399 hue="Response", 400 order=df.columns, 401 hue_order=scale, 402 palette="coolwarm", 403 ) 404 405 # Add titles and labels 406 plt.title(title) 407 plt.xlabel("Question") 408 plt.ylabel("Count") 409 plt.legend( 410 title=subquestion, bbox_to_anchor=(1.05, 1), loc="upper left" 411 ) # Move the legend outside 412 # Rotate x-axis labels if neede 413 plt.xticks(ticks=range(len(altnames)), labels=altnames) 414 415 plt.tight_layout() # Adjust layout for readability 416 417 filename = ( 418 "plot_" + subquestion.replace(" ", "_") + "_" + str(questionid) + ".png" 419 ) 420 421 plt.savefig(filename) 422 423 return filename 424 425 def make_rating(self, questionid, title=""): 426 """Displays distribution of a rating question.""" 427 428 acceptedtypes = ["rating", "select"] 429 430 df_extract_all = self.dataset.extract_subset([questionid]) 431 df_extract = df_extract_all[df_extract_all.columns[0]] 432 433 all_ratings = list( 434 range( 435 1, self.dataset.metadata[questionid]["params"]["options"]["factor"] + 1 436 ) 437 ) 438 439 df_extract = pd.DataFrame( 440 { 441 df_extract_all.columns[0]: [ 442 int(entry.split("/")[0]) for entry in df_extract 443 ] 444 } 445 ).dropna() 446 df_extract = pd.to_numeric( 447 df_extract[df_extract_all.columns[0]], errors="coerce" 448 ) 449 rating_counts = df_extract.value_counts().reindex(all_ratings, fill_value=0) 450 451 # Calculate Mean and RMS 452 mean_rating = df_extract.mean() 453 454 # Plot the distribution of ratings 455 plt.figure(figsize=(8, 6)) 456 ax = sns.barplot( 457 x=rating_counts.index, y=rating_counts.values, palette="viridis" 458 ) 459 460 # Add Mean and RMS to the legend 461 mean_label = f"Mean: {mean_rating:.2f}" 462 handles, labels = ax.get_legend_handles_labels() 463 handles.extend([plt.Line2D([0], [0], color="none", label=mean_label)]) 464 ax.legend(handles=handles, loc="upper right") 465 466 if not title: 467 title = df_extract_all.columns[0] 468 469 # Adding titles and labels 470 plt.title(title) 471 plt.xlabel("Rating") 472 plt.ylabel("Count") 473 474 filename = "plot_rating_" + str(questionid) + ".png" 475 476 plt.savefig(filename) 477 478 return filename 479 480 def make_multirating(self, questionid, title="", excludeterms=[]): 481 """Displays distribution of several rating question.""" 482 483 columnsall = self.dataset.metadata[questionid]["colnames"] 484 485 columns = [] 486 for col in columnsall: 487 dontuse = False 488 for term in excludeterms: 489 if col.find(term) > -1: 490 dontuse = True 491 if not excludeterms or not dontuse: 492 columns.append(col) 493 494 df = self.dataset.data[columns].dropna() 495 df[columns] = df[columns].applymap(lambda x: int(x.split("/")[0])) 496 497 altnames = [ 498 str(entry[0]) for entry in self.dataset.metadata[questionid]["subquestions"] 499 ] 500 501 renamemap = {} 502 for i in range(len(columnsall)): 503 dontuse = False 504 for term in excludeterms: 505 if columnsall[i].find(term) > -1: 506 dontuse = True 507 if not excludeterms or not dontuse: 508 renamemap.setdefault(columnsall[i], altnames[i]) 509 df = df.rename(columns=renamemap) 510 511 # Calculate mean and standard deviation for each question 512 means = df.mean() 513 stds = df.std() 514 515 # Plot each question as a horizontal bar with error bars for standard 516 # deviation 517 plt.figure(figsize=(8, 6)) 518 plt.barh( 519 y=means.index, width=means, xerr=stds, color="skyblue", edgecolor="gray" 520 ) 521 522 # Add mean values next to bars for clarity 523 for index, value in enumerate(means): 524 plt.text(value + 0.1, index, f"{value:.2f}", va="center", color="black") 525 526 if not title: 527 title = "Mean and Spread of Ratings by Question" 528 # Add labels and title 529 plt.xlabel( 530 "Rating out of " 531 + str(self.dataset.metadata[questionid]["params"]["options"]["factor"]) 532 ) 533 plt.title(title) 534 plt.grid(axis="x", linestyle="--", alpha=0.7) 535 536 plt.tight_layout() 537 538 filename = ( 539 "plot_multirating_" 540 + str(questionid) 541 + "_" 542 + str(excludeterms).lstrip("['").rstrip("']").replace("', '", "_") 543 + ".png" 544 ) 545 546 plt.savefig(filename) 547 548 return filename 549 550 551def _format_urls_in_text(text): 552 # Regular expression to match URLs 553 url_pattern = r"(https?://[^\s]+)" 554 # Replace each URL with Markdown formatted link 555 return re.sub(url_pattern, r"[\1](\1)", text)
15class ReportMaker: 16 def __init__(self, datasetpath="", configpath="", outfilepath=""): 17 self.conf = False 18 self.dataset = False 19 if outfilepath: 20 self.outpath = outfilepath.rstrip("/") + "/" 21 else: 22 self.outpath = outfilepath 23 24 if configpath: 25 with open(configpath, "r") as file: 26 self.conf = yaml.safe_load(file) 27 if datasetpath: 28 self.dataset = DataSet(datasetpath) 29 30 def create_report(self, output="display", writepdf=False): 31 """Creates the full report from the provided configuration (structure.yml) file. 32 Can output to notebook (display) or as markdown files (pages). 33 If 'writepdf' is set, a summary markdown and pdf is produced. You can set the pdfname in the configuration file.""" 34 35 if not self.conf: 36 print("You did not provide a config file!") 37 return 38 39 if output == "pages": 40 indexpage = ( 41 "# EVERSE software quality landscaping survey\n\n## Survey results\n\n" 42 ) 43 44 if not os.path.exists(self.outpath + "pages/"): 45 os.makedirs(self.outpath + "pages/") 46 47 if not os.path.exists(self.outpath + "pages/figures/"): 48 os.makedirs(self.outpath + "pages/figures/") 49 50 if writepdf: 51 alltext = ( 52 "# EVERSE software quality landscaping survey\n\n## Survey results\n\n" 53 ) 54 55 # write pages for all "chapters" defined in the configuration 56 for chap in self.conf: 57 elements = [] 58 title = "" 59 if chap.find("chap") > -1: 60 title = chap.lstrip("chap_") 61 if "title" in self.conf[chap]: 62 title = self.conf[chap]["title"] 63 elements.append("## " + title + "\n") 64 if "description" in self.conf[chap]: 65 elements.append(self.conf[chap]["description"] + "\n") 66 for subkey in self.conf[chap]: 67 cluster = self.conf[chap][subkey] 68 if "title" in cluster: 69 elements.append("### " + cluster["title"] + "\n") 70 if "description" in cluster: 71 elements.append(cluster["description"] + "\n") 72 if "table" in subkey: 73 if not "alttitles" in cluster: 74 elements.append( 75 self.make_table(cluster["identifiers"]) + "\n" 76 ) 77 else: 78 elements.append( 79 self.make_table( 80 cluster["identifiers"], cluster["alttitles"] 81 ) 82 + "\n" 83 ) 84 85 if "count" in subkey: 86 charttype = "bar" 87 title = "Counts" 88 altanswers = [] 89 if "altoptions" in cluster: 90 altanswers = cluster["altoptions"] 91 if "title" in cluster: 92 title = cluster["title"] 93 if "charttype" in cluster: 94 charttype = cluster["charttype"] 95 plotname = self.make_count_chart( 96 cluster["identifier"], charttype, title, altanswers 97 ) 98 if plotname: 99 if os.path.exists( 100 self.outpath + "pages/figures/" + plotname 101 ): 102 os.remove(self.outpath + "pages/figures/" + plotname) 103 shutil.move(plotname, self.outpath + "pages/figures/") 104 elements.append("") 105 106 if "compare" in subkey: 107 selector = "" 108 altanswers = [] 109 if "title" in cluster: 110 title = cluster["title"] 111 if "altoptions" in cluster: 112 altanswers = cluster["altoptions"] 113 plotname = self.make_compare_likert( 114 cluster["identifier"], 115 cluster["selectedparts"], 116 title, 117 altanswers, 118 ) 119 if plotname: 120 if os.path.exists( 121 self.outpath + "pages/figures/" + plotname 122 ): 123 os.remove(self.outpath + "pages/figures/" + plotname) 124 shutil.move(plotname, self.outpath + "pages/figures/") 125 elements.append("") 126 127 if "rating" in subkey: 128 if "title" in cluster: 129 title = cluster["title"] 130 plotname = self.make_rating(cluster["identifier"], title) 131 if plotname: 132 if os.path.exists( 133 self.outpath + "pages/figures/" + plotname 134 ): 135 os.remove(self.outpath + "pages/figures/" + plotname) 136 shutil.move(plotname, self.outpath + "pages/figures/") 137 elements.append("") 138 139 if "multirate" in subkey: 140 exclusion = "" 141 if "title" in cluster: 142 title = cluster["title"] 143 if "exclude" in cluster: 144 exclusion = cluster["exclude"] 145 plotname = self.make_multirating( 146 cluster["identifier"], title, exclusion 147 ) 148 if plotname: 149 if os.path.exists( 150 self.outpath + "pages/figures/" + plotname 151 ): 152 os.remove(self.outpath + "pages/figures/" + plotname) 153 shutil.move(plotname, self.outpath + "pages/figures/") 154 elements.append("") 155 156 if output == "display": 157 for element in elements: 158 display(Markdown(element)) 159 elif output == "pages": 160 filename = chap 161 if "filename" in self.conf[chap]: 162 filename = self.conf[chap]["filename"] 163 title = chap.lstrip("chap_") 164 if "title" in self.conf[chap]: 165 title = self.conf[chap]["title"] 166 elements.append("## " + title + "\n") 167 168 with open(self.outpath + "pages/" + filename + ".md", "w") as f: 169 for element in elements: 170 f.write(element + "\n") 171 print("Title:", title) 172 indexpage += "- [" + str(title) + "](pages/" + filename + ".md)\n" 173 174 if writepdf: 175 alltext += "\n\n".join( 176 [el.replace("figures/", "pages/figures/") for el in elements] 177 ) 178 179 if output == "pages": 180 indexpage += "\nBack to the [repository](https://youschnabel.github.io/EVERSE-landscape/), or look up the [code documentation](https://youschnabel.github.io/EVERSE-landscape/pydocs/surveyer.html)!\n" 181 indexpage += "\nThis site was built using [GitHub Pages](https://pages.github.com) and Jekyll.\n" 182 with open(self.outpath + "index.md", "w") as f: 183 f.write(indexpage) 184 185 if writepdf: 186 pdffilepath = self.outpath + "EVERSEsurveyresults.pdf" 187 if "pdfname" in self.conf: 188 pdffilepath = self.outpath + self.conf["pdfname"] 189 mdfilepath = self.outpath + pdffilepath.rstrip(".pdf") + ".md" 190 with open(mdfilepath, "w") as temp_md_file: 191 temp_md_file.write(alltext) 192 try: 193 # Call Pandoc to convert the Markdown file to PDF 194 subprocess.run( 195 [ 196 "pandoc", 197 mdfilepath, 198 "--from=markdown", 199 "--to=pdf", 200 "--output", 201 pdffilepath, 202 "--template=template.tex", 203 ], 204 check=True, 205 ) 206 print(f"PDF generated successfully: {pdffilepath}") 207 except subprocess.CalledProcessError: 208 print("Error: Pandoc failed to convert Markdown to PDF.") 209 210 def make_table(self, questionids, alttitles=[]): 211 """Produces table from text answers, deleting rows without answers.""" 212 213 acceptedtypes = ["text", "enumerate", "select"] 214 215 df_all = self.dataset.extract_subset(questionids, acceptedtypes) 216 df = df_all.dropna(how="all").fillna("") 217 df = df.applymap(_format_urls_in_text) 218 219 if alttitles and len(alttitles) == len(questionids): 220 newnames = {} 221 for i in range(len(alttitles)): 222 newnames.setdefault( 223 self.dataset.metadata[questionids[i]]["question"], alttitles[i] 224 ) 225 df = df.rename(columns=newnames) 226 227 return df.to_markdown() 228 229 def make_count_chart(self, questionid, charttype="bar", title="", altanswers=[]): 230 """Produces a graphic to show basic statistic of selection or enumeration questions. 231 Chart type can be 'bar', 'pie' or 'line', and you can provide alternative title and answer lables.""" 232 233 acceptedtypes = ["select", "enumerate"] 234 235 if not title: 236 title = "Count of responses" 237 238 df_extract = self.dataset.extract_subset([questionid], acceptedtypes) 239 240 if self.dataset.metadata["A2"]["entrytype"] == "enumerate": 241 df_nona = df_extract.dropna() 242 df_enlisted = [ 243 entry.split("; ") for entry in df_nona[df_extract.columns[0]] 244 ] 245 df_extract = pd.DataFrame( 246 { 247 df_extract.columns[0]: [ 248 item for sublist in df_enlisted for item in sublist 249 ] 250 } 251 ) 252 253 answer_counts = df_extract.value_counts() 254 255 # create the bar chart 256 plt.figure(figsize=(6, 4)) 257 if charttype != "pie": 258 answer_counts.plot(kind=charttype) 259 plt.xlabel("Response") 260 plt.ylabel("Frequency") 261 plt.xticks(rotation=40, ha="right") 262 else: 263 plt.xlabel("Number of entries: " + str(answer_counts.sum())) 264 265 plt.title(title) 266 267 if altanswers: 268 if len(answer_counts) == len(altanswers): 269 if charttype != "pie": 270 plt.xticks( 271 ticks=range(len(altanswers)), 272 labels=altanswers, 273 rotation=40, 274 ha="right", 275 ) 276 else: 277 plt.pie( 278 answer_counts, 279 labels=altanswers, 280 autopct="%1.1f%%", 281 startangle=90, 282 ) 283 else: 284 print( 285 "Alternative options length", 286 len(altanswers), 287 "does not match required length", 288 len(answer_counts), 289 ) 290 291 filename = ( 292 "plot_" 293 + charttype 294 + "_" 295 + str(questionid) 296 .replace("[", "") 297 .replace("]", "") 298 .replace(",", "_") 299 .replace("'", "") 300 + ".png" 301 ) 302 303 plt.tight_layout() 304 # Save the chart as an image 305 plt.savefig(filename) 306 307 return filename 308 309 def make_compare_likert(self, questionid, subquestion, title="", altanswers=[]): 310 """Displays several Likert-scale type answers for comparison. 311 Needs subquestion to select all subquestions which contain the provided string. 312 Can be passed title for the plot and answer options to replace the original ones. 313 Scales can be provided under the 'scales' entry in the configuration file. 314 """ 315 acceptedtypes = ["select"] 316 317 if not title: 318 title = "Estimate" 319 320 # df_extract = self.dataset.extract_subset(questionids, acceptedtypes) 321 # # also extract subquestion 322 323 columns = [] 324 altnames = [] 325 for i in range(len(self.dataset.metadata[questionid]["subquestions"])): 326 otherpart = [] 327 foundpart = "" 328 for entry in self.dataset.metadata[questionid]["subquestions"][i]: 329 if entry.find(subquestion) > -1: 330 foundpart = self.dataset.metadata[questionid]["colnames"][i] 331 else: 332 otherpart.append(entry) 333 if foundpart: 334 columns.append(foundpart) 335 altnames.append(otherpart) 336 337 if altanswers: 338 altnames = altanswers 339 else: 340 altnames = [ 341 str(ent).replace("['", "").replace("']", "").replace("','", " : ") 342 for ent in altnames 343 ] 344 345 plottype = "" 346 options = "" 347 348 for col in columns: 349 if col in self.dataset.metadata[questionid]["params"]["options"]: 350 if not options: 351 options = self.dataset.metadata[questionid]["params"]["options"][ 352 col 353 ] 354 else: 355 newopts = self.dataset.metadata[questionid]["params"]["options"][ 356 col 357 ] 358 for opt in newopts: 359 if not opt in options: 360 options.append(opt) 361 362 if col in self.dataset.metadata[questionid]["params"]["subtypes"]: 363 if not plottype: 364 plottype = self.dataset.metadata[questionid]["params"]["subtypes"][ 365 col 366 ] 367 else: 368 if ( 369 self.dataset.metadata[questionid]["params"]["subtypes"][col] 370 != plottype 371 ): 372 print( 373 "Question types don't match!", 374 self.dataset.metadata[questionid]["params"]["subtypes"][ 375 col 376 ], 377 plottype, 378 ) 379 plottype = "mismatch" 380 381 scale = [] 382 383 if "scales" in self.conf: 384 for skey in self.conf["scales"]: 385 if sorted(self.conf["scales"][skey]["options"]) == sorted(options): 386 scale = self.conf["scales"][skey]["options"] 387 388 if not scale: 389 scale = options 390 391 df = self.dataset.data[columns] 392 393 df_melted = df.melt(var_name=subquestion, value_name="Response") 394 395 # Plot the grouped bar chart 396 plt.figure(figsize=(12, 6)) 397 sns.countplot( 398 data=df_melted, 399 x=subquestion, 400 hue="Response", 401 order=df.columns, 402 hue_order=scale, 403 palette="coolwarm", 404 ) 405 406 # Add titles and labels 407 plt.title(title) 408 plt.xlabel("Question") 409 plt.ylabel("Count") 410 plt.legend( 411 title=subquestion, bbox_to_anchor=(1.05, 1), loc="upper left" 412 ) # Move the legend outside 413 # Rotate x-axis labels if neede 414 plt.xticks(ticks=range(len(altnames)), labels=altnames) 415 416 plt.tight_layout() # Adjust layout for readability 417 418 filename = ( 419 "plot_" + subquestion.replace(" ", "_") + "_" + str(questionid) + ".png" 420 ) 421 422 plt.savefig(filename) 423 424 return filename 425 426 def make_rating(self, questionid, title=""): 427 """Displays distribution of a rating question.""" 428 429 acceptedtypes = ["rating", "select"] 430 431 df_extract_all = self.dataset.extract_subset([questionid]) 432 df_extract = df_extract_all[df_extract_all.columns[0]] 433 434 all_ratings = list( 435 range( 436 1, self.dataset.metadata[questionid]["params"]["options"]["factor"] + 1 437 ) 438 ) 439 440 df_extract = pd.DataFrame( 441 { 442 df_extract_all.columns[0]: [ 443 int(entry.split("/")[0]) for entry in df_extract 444 ] 445 } 446 ).dropna() 447 df_extract = pd.to_numeric( 448 df_extract[df_extract_all.columns[0]], errors="coerce" 449 ) 450 rating_counts = df_extract.value_counts().reindex(all_ratings, fill_value=0) 451 452 # Calculate Mean and RMS 453 mean_rating = df_extract.mean() 454 455 # Plot the distribution of ratings 456 plt.figure(figsize=(8, 6)) 457 ax = sns.barplot( 458 x=rating_counts.index, y=rating_counts.values, palette="viridis" 459 ) 460 461 # Add Mean and RMS to the legend 462 mean_label = f"Mean: {mean_rating:.2f}" 463 handles, labels = ax.get_legend_handles_labels() 464 handles.extend([plt.Line2D([0], [0], color="none", label=mean_label)]) 465 ax.legend(handles=handles, loc="upper right") 466 467 if not title: 468 title = df_extract_all.columns[0] 469 470 # Adding titles and labels 471 plt.title(title) 472 plt.xlabel("Rating") 473 plt.ylabel("Count") 474 475 filename = "plot_rating_" + str(questionid) + ".png" 476 477 plt.savefig(filename) 478 479 return filename 480 481 def make_multirating(self, questionid, title="", excludeterms=[]): 482 """Displays distribution of several rating question.""" 483 484 columnsall = self.dataset.metadata[questionid]["colnames"] 485 486 columns = [] 487 for col in columnsall: 488 dontuse = False 489 for term in excludeterms: 490 if col.find(term) > -1: 491 dontuse = True 492 if not excludeterms or not dontuse: 493 columns.append(col) 494 495 df = self.dataset.data[columns].dropna() 496 df[columns] = df[columns].applymap(lambda x: int(x.split("/")[0])) 497 498 altnames = [ 499 str(entry[0]) for entry in self.dataset.metadata[questionid]["subquestions"] 500 ] 501 502 renamemap = {} 503 for i in range(len(columnsall)): 504 dontuse = False 505 for term in excludeterms: 506 if columnsall[i].find(term) > -1: 507 dontuse = True 508 if not excludeterms or not dontuse: 509 renamemap.setdefault(columnsall[i], altnames[i]) 510 df = df.rename(columns=renamemap) 511 512 # Calculate mean and standard deviation for each question 513 means = df.mean() 514 stds = df.std() 515 516 # Plot each question as a horizontal bar with error bars for standard 517 # deviation 518 plt.figure(figsize=(8, 6)) 519 plt.barh( 520 y=means.index, width=means, xerr=stds, color="skyblue", edgecolor="gray" 521 ) 522 523 # Add mean values next to bars for clarity 524 for index, value in enumerate(means): 525 plt.text(value + 0.1, index, f"{value:.2f}", va="center", color="black") 526 527 if not title: 528 title = "Mean and Spread of Ratings by Question" 529 # Add labels and title 530 plt.xlabel( 531 "Rating out of " 532 + str(self.dataset.metadata[questionid]["params"]["options"]["factor"]) 533 ) 534 plt.title(title) 535 plt.grid(axis="x", linestyle="--", alpha=0.7) 536 537 plt.tight_layout() 538 539 filename = ( 540 "plot_multirating_" 541 + str(questionid) 542 + "_" 543 + str(excludeterms).lstrip("['").rstrip("']").replace("', '", "_") 544 + ".png" 545 ) 546 547 plt.savefig(filename) 548 549 return filename
16 def __init__(self, datasetpath="", configpath="", outfilepath=""): 17 self.conf = False 18 self.dataset = False 19 if outfilepath: 20 self.outpath = outfilepath.rstrip("/") + "/" 21 else: 22 self.outpath = outfilepath 23 24 if configpath: 25 with open(configpath, "r") as file: 26 self.conf = yaml.safe_load(file) 27 if datasetpath: 28 self.dataset = DataSet(datasetpath)
30 def create_report(self, output="display", writepdf=False): 31 """Creates the full report from the provided configuration (structure.yml) file. 32 Can output to notebook (display) or as markdown files (pages). 33 If 'writepdf' is set, a summary markdown and pdf is produced. You can set the pdfname in the configuration file.""" 34 35 if not self.conf: 36 print("You did not provide a config file!") 37 return 38 39 if output == "pages": 40 indexpage = ( 41 "# EVERSE software quality landscaping survey\n\n## Survey results\n\n" 42 ) 43 44 if not os.path.exists(self.outpath + "pages/"): 45 os.makedirs(self.outpath + "pages/") 46 47 if not os.path.exists(self.outpath + "pages/figures/"): 48 os.makedirs(self.outpath + "pages/figures/") 49 50 if writepdf: 51 alltext = ( 52 "# EVERSE software quality landscaping survey\n\n## Survey results\n\n" 53 ) 54 55 # write pages for all "chapters" defined in the configuration 56 for chap in self.conf: 57 elements = [] 58 title = "" 59 if chap.find("chap") > -1: 60 title = chap.lstrip("chap_") 61 if "title" in self.conf[chap]: 62 title = self.conf[chap]["title"] 63 elements.append("## " + title + "\n") 64 if "description" in self.conf[chap]: 65 elements.append(self.conf[chap]["description"] + "\n") 66 for subkey in self.conf[chap]: 67 cluster = self.conf[chap][subkey] 68 if "title" in cluster: 69 elements.append("### " + cluster["title"] + "\n") 70 if "description" in cluster: 71 elements.append(cluster["description"] + "\n") 72 if "table" in subkey: 73 if not "alttitles" in cluster: 74 elements.append( 75 self.make_table(cluster["identifiers"]) + "\n" 76 ) 77 else: 78 elements.append( 79 self.make_table( 80 cluster["identifiers"], cluster["alttitles"] 81 ) 82 + "\n" 83 ) 84 85 if "count" in subkey: 86 charttype = "bar" 87 title = "Counts" 88 altanswers = [] 89 if "altoptions" in cluster: 90 altanswers = cluster["altoptions"] 91 if "title" in cluster: 92 title = cluster["title"] 93 if "charttype" in cluster: 94 charttype = cluster["charttype"] 95 plotname = self.make_count_chart( 96 cluster["identifier"], charttype, title, altanswers 97 ) 98 if plotname: 99 if os.path.exists( 100 self.outpath + "pages/figures/" + plotname 101 ): 102 os.remove(self.outpath + "pages/figures/" + plotname) 103 shutil.move(plotname, self.outpath + "pages/figures/") 104 elements.append("") 105 106 if "compare" in subkey: 107 selector = "" 108 altanswers = [] 109 if "title" in cluster: 110 title = cluster["title"] 111 if "altoptions" in cluster: 112 altanswers = cluster["altoptions"] 113 plotname = self.make_compare_likert( 114 cluster["identifier"], 115 cluster["selectedparts"], 116 title, 117 altanswers, 118 ) 119 if plotname: 120 if os.path.exists( 121 self.outpath + "pages/figures/" + plotname 122 ): 123 os.remove(self.outpath + "pages/figures/" + plotname) 124 shutil.move(plotname, self.outpath + "pages/figures/") 125 elements.append("") 126 127 if "rating" in subkey: 128 if "title" in cluster: 129 title = cluster["title"] 130 plotname = self.make_rating(cluster["identifier"], title) 131 if plotname: 132 if os.path.exists( 133 self.outpath + "pages/figures/" + plotname 134 ): 135 os.remove(self.outpath + "pages/figures/" + plotname) 136 shutil.move(plotname, self.outpath + "pages/figures/") 137 elements.append("") 138 139 if "multirate" in subkey: 140 exclusion = "" 141 if "title" in cluster: 142 title = cluster["title"] 143 if "exclude" in cluster: 144 exclusion = cluster["exclude"] 145 plotname = self.make_multirating( 146 cluster["identifier"], title, exclusion 147 ) 148 if plotname: 149 if os.path.exists( 150 self.outpath + "pages/figures/" + plotname 151 ): 152 os.remove(self.outpath + "pages/figures/" + plotname) 153 shutil.move(plotname, self.outpath + "pages/figures/") 154 elements.append("") 155 156 if output == "display": 157 for element in elements: 158 display(Markdown(element)) 159 elif output == "pages": 160 filename = chap 161 if "filename" in self.conf[chap]: 162 filename = self.conf[chap]["filename"] 163 title = chap.lstrip("chap_") 164 if "title" in self.conf[chap]: 165 title = self.conf[chap]["title"] 166 elements.append("## " + title + "\n") 167 168 with open(self.outpath + "pages/" + filename + ".md", "w") as f: 169 for element in elements: 170 f.write(element + "\n") 171 print("Title:", title) 172 indexpage += "- [" + str(title) + "](pages/" + filename + ".md)\n" 173 174 if writepdf: 175 alltext += "\n\n".join( 176 [el.replace("figures/", "pages/figures/") for el in elements] 177 ) 178 179 if output == "pages": 180 indexpage += "\nBack to the [repository](https://youschnabel.github.io/EVERSE-landscape/), or look up the [code documentation](https://youschnabel.github.io/EVERSE-landscape/pydocs/surveyer.html)!\n" 181 indexpage += "\nThis site was built using [GitHub Pages](https://pages.github.com) and Jekyll.\n" 182 with open(self.outpath + "index.md", "w") as f: 183 f.write(indexpage) 184 185 if writepdf: 186 pdffilepath = self.outpath + "EVERSEsurveyresults.pdf" 187 if "pdfname" in self.conf: 188 pdffilepath = self.outpath + self.conf["pdfname"] 189 mdfilepath = self.outpath + pdffilepath.rstrip(".pdf") + ".md" 190 with open(mdfilepath, "w") as temp_md_file: 191 temp_md_file.write(alltext) 192 try: 193 # Call Pandoc to convert the Markdown file to PDF 194 subprocess.run( 195 [ 196 "pandoc", 197 mdfilepath, 198 "--from=markdown", 199 "--to=pdf", 200 "--output", 201 pdffilepath, 202 "--template=template.tex", 203 ], 204 check=True, 205 ) 206 print(f"PDF generated successfully: {pdffilepath}") 207 except subprocess.CalledProcessError: 208 print("Error: Pandoc failed to convert Markdown to PDF.")
Creates the full report from the provided configuration (structure.yml) file. Can output to notebook (display) or as markdown files (pages). If 'writepdf' is set, a summary markdown and pdf is produced. You can set the pdfname in the configuration file.
210 def make_table(self, questionids, alttitles=[]): 211 """Produces table from text answers, deleting rows without answers.""" 212 213 acceptedtypes = ["text", "enumerate", "select"] 214 215 df_all = self.dataset.extract_subset(questionids, acceptedtypes) 216 df = df_all.dropna(how="all").fillna("") 217 df = df.applymap(_format_urls_in_text) 218 219 if alttitles and len(alttitles) == len(questionids): 220 newnames = {} 221 for i in range(len(alttitles)): 222 newnames.setdefault( 223 self.dataset.metadata[questionids[i]]["question"], alttitles[i] 224 ) 225 df = df.rename(columns=newnames) 226 227 return df.to_markdown()
Produces table from text answers, deleting rows without answers.
229 def make_count_chart(self, questionid, charttype="bar", title="", altanswers=[]): 230 """Produces a graphic to show basic statistic of selection or enumeration questions. 231 Chart type can be 'bar', 'pie' or 'line', and you can provide alternative title and answer lables.""" 232 233 acceptedtypes = ["select", "enumerate"] 234 235 if not title: 236 title = "Count of responses" 237 238 df_extract = self.dataset.extract_subset([questionid], acceptedtypes) 239 240 if self.dataset.metadata["A2"]["entrytype"] == "enumerate": 241 df_nona = df_extract.dropna() 242 df_enlisted = [ 243 entry.split("; ") for entry in df_nona[df_extract.columns[0]] 244 ] 245 df_extract = pd.DataFrame( 246 { 247 df_extract.columns[0]: [ 248 item for sublist in df_enlisted for item in sublist 249 ] 250 } 251 ) 252 253 answer_counts = df_extract.value_counts() 254 255 # create the bar chart 256 plt.figure(figsize=(6, 4)) 257 if charttype != "pie": 258 answer_counts.plot(kind=charttype) 259 plt.xlabel("Response") 260 plt.ylabel("Frequency") 261 plt.xticks(rotation=40, ha="right") 262 else: 263 plt.xlabel("Number of entries: " + str(answer_counts.sum())) 264 265 plt.title(title) 266 267 if altanswers: 268 if len(answer_counts) == len(altanswers): 269 if charttype != "pie": 270 plt.xticks( 271 ticks=range(len(altanswers)), 272 labels=altanswers, 273 rotation=40, 274 ha="right", 275 ) 276 else: 277 plt.pie( 278 answer_counts, 279 labels=altanswers, 280 autopct="%1.1f%%", 281 startangle=90, 282 ) 283 else: 284 print( 285 "Alternative options length", 286 len(altanswers), 287 "does not match required length", 288 len(answer_counts), 289 ) 290 291 filename = ( 292 "plot_" 293 + charttype 294 + "_" 295 + str(questionid) 296 .replace("[", "") 297 .replace("]", "") 298 .replace(",", "_") 299 .replace("'", "") 300 + ".png" 301 ) 302 303 plt.tight_layout() 304 # Save the chart as an image 305 plt.savefig(filename) 306 307 return filename
Produces a graphic to show basic statistic of selection or enumeration questions. Chart type can be 'bar', 'pie' or 'line', and you can provide alternative title and answer lables.
309 def make_compare_likert(self, questionid, subquestion, title="", altanswers=[]): 310 """Displays several Likert-scale type answers for comparison. 311 Needs subquestion to select all subquestions which contain the provided string. 312 Can be passed title for the plot and answer options to replace the original ones. 313 Scales can be provided under the 'scales' entry in the configuration file. 314 """ 315 acceptedtypes = ["select"] 316 317 if not title: 318 title = "Estimate" 319 320 # df_extract = self.dataset.extract_subset(questionids, acceptedtypes) 321 # # also extract subquestion 322 323 columns = [] 324 altnames = [] 325 for i in range(len(self.dataset.metadata[questionid]["subquestions"])): 326 otherpart = [] 327 foundpart = "" 328 for entry in self.dataset.metadata[questionid]["subquestions"][i]: 329 if entry.find(subquestion) > -1: 330 foundpart = self.dataset.metadata[questionid]["colnames"][i] 331 else: 332 otherpart.append(entry) 333 if foundpart: 334 columns.append(foundpart) 335 altnames.append(otherpart) 336 337 if altanswers: 338 altnames = altanswers 339 else: 340 altnames = [ 341 str(ent).replace("['", "").replace("']", "").replace("','", " : ") 342 for ent in altnames 343 ] 344 345 plottype = "" 346 options = "" 347 348 for col in columns: 349 if col in self.dataset.metadata[questionid]["params"]["options"]: 350 if not options: 351 options = self.dataset.metadata[questionid]["params"]["options"][ 352 col 353 ] 354 else: 355 newopts = self.dataset.metadata[questionid]["params"]["options"][ 356 col 357 ] 358 for opt in newopts: 359 if not opt in options: 360 options.append(opt) 361 362 if col in self.dataset.metadata[questionid]["params"]["subtypes"]: 363 if not plottype: 364 plottype = self.dataset.metadata[questionid]["params"]["subtypes"][ 365 col 366 ] 367 else: 368 if ( 369 self.dataset.metadata[questionid]["params"]["subtypes"][col] 370 != plottype 371 ): 372 print( 373 "Question types don't match!", 374 self.dataset.metadata[questionid]["params"]["subtypes"][ 375 col 376 ], 377 plottype, 378 ) 379 plottype = "mismatch" 380 381 scale = [] 382 383 if "scales" in self.conf: 384 for skey in self.conf["scales"]: 385 if sorted(self.conf["scales"][skey]["options"]) == sorted(options): 386 scale = self.conf["scales"][skey]["options"] 387 388 if not scale: 389 scale = options 390 391 df = self.dataset.data[columns] 392 393 df_melted = df.melt(var_name=subquestion, value_name="Response") 394 395 # Plot the grouped bar chart 396 plt.figure(figsize=(12, 6)) 397 sns.countplot( 398 data=df_melted, 399 x=subquestion, 400 hue="Response", 401 order=df.columns, 402 hue_order=scale, 403 palette="coolwarm", 404 ) 405 406 # Add titles and labels 407 plt.title(title) 408 plt.xlabel("Question") 409 plt.ylabel("Count") 410 plt.legend( 411 title=subquestion, bbox_to_anchor=(1.05, 1), loc="upper left" 412 ) # Move the legend outside 413 # Rotate x-axis labels if neede 414 plt.xticks(ticks=range(len(altnames)), labels=altnames) 415 416 plt.tight_layout() # Adjust layout for readability 417 418 filename = ( 419 "plot_" + subquestion.replace(" ", "_") + "_" + str(questionid) + ".png" 420 ) 421 422 plt.savefig(filename) 423 424 return filename
Displays several Likert-scale type answers for comparison. Needs subquestion to select all subquestions which contain the provided string. Can be passed title for the plot and answer options to replace the original ones. Scales can be provided under the 'scales' entry in the configuration file.
426 def make_rating(self, questionid, title=""): 427 """Displays distribution of a rating question.""" 428 429 acceptedtypes = ["rating", "select"] 430 431 df_extract_all = self.dataset.extract_subset([questionid]) 432 df_extract = df_extract_all[df_extract_all.columns[0]] 433 434 all_ratings = list( 435 range( 436 1, self.dataset.metadata[questionid]["params"]["options"]["factor"] + 1 437 ) 438 ) 439 440 df_extract = pd.DataFrame( 441 { 442 df_extract_all.columns[0]: [ 443 int(entry.split("/")[0]) for entry in df_extract 444 ] 445 } 446 ).dropna() 447 df_extract = pd.to_numeric( 448 df_extract[df_extract_all.columns[0]], errors="coerce" 449 ) 450 rating_counts = df_extract.value_counts().reindex(all_ratings, fill_value=0) 451 452 # Calculate Mean and RMS 453 mean_rating = df_extract.mean() 454 455 # Plot the distribution of ratings 456 plt.figure(figsize=(8, 6)) 457 ax = sns.barplot( 458 x=rating_counts.index, y=rating_counts.values, palette="viridis" 459 ) 460 461 # Add Mean and RMS to the legend 462 mean_label = f"Mean: {mean_rating:.2f}" 463 handles, labels = ax.get_legend_handles_labels() 464 handles.extend([plt.Line2D([0], [0], color="none", label=mean_label)]) 465 ax.legend(handles=handles, loc="upper right") 466 467 if not title: 468 title = df_extract_all.columns[0] 469 470 # Adding titles and labels 471 plt.title(title) 472 plt.xlabel("Rating") 473 plt.ylabel("Count") 474 475 filename = "plot_rating_" + str(questionid) + ".png" 476 477 plt.savefig(filename) 478 479 return filename
Displays distribution of a rating question.
481 def make_multirating(self, questionid, title="", excludeterms=[]): 482 """Displays distribution of several rating question.""" 483 484 columnsall = self.dataset.metadata[questionid]["colnames"] 485 486 columns = [] 487 for col in columnsall: 488 dontuse = False 489 for term in excludeterms: 490 if col.find(term) > -1: 491 dontuse = True 492 if not excludeterms or not dontuse: 493 columns.append(col) 494 495 df = self.dataset.data[columns].dropna() 496 df[columns] = df[columns].applymap(lambda x: int(x.split("/")[0])) 497 498 altnames = [ 499 str(entry[0]) for entry in self.dataset.metadata[questionid]["subquestions"] 500 ] 501 502 renamemap = {} 503 for i in range(len(columnsall)): 504 dontuse = False 505 for term in excludeterms: 506 if columnsall[i].find(term) > -1: 507 dontuse = True 508 if not excludeterms or not dontuse: 509 renamemap.setdefault(columnsall[i], altnames[i]) 510 df = df.rename(columns=renamemap) 511 512 # Calculate mean and standard deviation for each question 513 means = df.mean() 514 stds = df.std() 515 516 # Plot each question as a horizontal bar with error bars for standard 517 # deviation 518 plt.figure(figsize=(8, 6)) 519 plt.barh( 520 y=means.index, width=means, xerr=stds, color="skyblue", edgecolor="gray" 521 ) 522 523 # Add mean values next to bars for clarity 524 for index, value in enumerate(means): 525 plt.text(value + 0.1, index, f"{value:.2f}", va="center", color="black") 526 527 if not title: 528 title = "Mean and Spread of Ratings by Question" 529 # Add labels and title 530 plt.xlabel( 531 "Rating out of " 532 + str(self.dataset.metadata[questionid]["params"]["options"]["factor"]) 533 ) 534 plt.title(title) 535 plt.grid(axis="x", linestyle="--", alpha=0.7) 536 537 plt.tight_layout() 538 539 filename = ( 540 "plot_multirating_" 541 + str(questionid) 542 + "_" 543 + str(excludeterms).lstrip("['").rstrip("']").replace("', '", "_") 544 + ".png" 545 ) 546 547 plt.savefig(filename) 548 549 return filename
Displays distribution of several rating question.