surveyer.plotter

  1import subprocess
  2import shutil
  3import yaml
  4import os
  5
  6from IPython.display import Markdown, display
  7import matplotlib.pyplot as plt
  8import pandas as pd
  9import seaborn as sns
 10import re
 11
 12from surveyer.surveyreader import DataSet
 13
 14class ReportMaker:
 15    def __init__(self, datasetpath="", configpath="", outfilepath=""):
 16        self.conf = False
 17        self.dataset = False
 18        if outfilepath:
 19            self.outpath = outfilepath.rstrip("/") + "/"
 20        else:
 21            self.outpath = outfilepath
 22
 23        if configpath:
 24            with open(configpath, "r") as file:
 25                self.conf = yaml.safe_load(file)
 26        if datasetpath:
 27            self.dataset = DataSet(datasetpath)
 28
 29    def create_report(self, output="display", writepdf=False):
 30        """Creates the full report from the provided configuration (structure.yml) file.
 31        Can output to notebook (display) or as markdown files (pages).
 32        If 'writepdf' is set, a summary markdown and pdf is produced. You can set the pdfname in the configuration file."""
 33
 34        if not self.conf:
 35            print("You did not provide a config file!")
 36            return
 37
 38        if output == "pages":
 39            indexpage = (
 40                "# EVERSE software quality landscaping survey\n\n## Survey results\n\n"
 41            )
 42
 43            if not os.path.exists(self.outpath + "pages/"):
 44                os.makedirs(self.outpath + "pages/")
 45
 46            if not os.path.exists(self.outpath + "pages/figures/"):
 47                os.makedirs(self.outpath + "pages/figures/")
 48
 49        if writepdf:
 50            alltext = (
 51                "# EVERSE software quality landscaping survey\n\n## Survey results\n\n"
 52            )
 53
 54        # write pages for all "chapters" defined in the configuration
 55        for chap in self.conf:
 56            elements = []
 57            title = ""
 58            if chap.find("chap") > -1:
 59                title = chap.lstrip("chap_")
 60                if "title" in self.conf[chap]:
 61                    title = self.conf[chap]["title"]
 62                    elements.append("## " + title + "\n")
 63                if "description" in self.conf[chap]:
 64                    elements.append(self.conf[chap]["description"] + "\n")
 65                for subkey in self.conf[chap]:
 66                    cluster = self.conf[chap][subkey]
 67                    if "title" in cluster:
 68                        elements.append("### " + cluster["title"] + "\n")
 69                    if "description" in cluster:
 70                        elements.append(cluster["description"] + "\n")
 71                    if "table" in subkey:
 72                        if not "alttitles" in cluster:
 73                            elements.append(
 74                                self.make_table(cluster["identifiers"]) + "\n"
 75                            )
 76                        else:
 77                            elements.append(
 78                                self.make_table(
 79                                    cluster["identifiers"], cluster["alttitles"]
 80                                )
 81                                + "\n"
 82                            )
 83
 84                    if "count" in subkey:
 85                        charttype = "bar"
 86                        title = "Counts"
 87                        altanswers = []
 88                        if "altoptions" in cluster:
 89                            altanswers = cluster["altoptions"]
 90                        if "title" in cluster:
 91                            title = cluster["title"]
 92                        if "charttype" in cluster:
 93                            charttype = cluster["charttype"]
 94                        plotname = self.make_count_chart(
 95                            cluster["identifier"], charttype, title, altanswers
 96                        )
 97                        if plotname:
 98                            if os.path.exists(
 99                                self.outpath + "pages/figures/" + plotname
100                            ):
101                                os.remove(self.outpath + "pages/figures/" + plotname)
102                            shutil.move(plotname, self.outpath + "pages/figures/")
103                        elements.append("![" + title + "](figures/" + plotname + ")")
104
105                    if "compare" in subkey:
106                        selector = ""
107                        altanswers = []
108                        if "title" in cluster:
109                            title = cluster["title"]
110                        if "altoptions" in cluster:
111                            altanswers = cluster["altoptions"]
112                        plotname = self.make_compare_likert(
113                            cluster["identifier"],
114                            cluster["selectedparts"],
115                            title,
116                            altanswers,
117                        )
118                        if plotname:
119                            if os.path.exists(
120                                self.outpath + "pages/figures/" + plotname
121                            ):
122                                os.remove(self.outpath + "pages/figures/" + plotname)
123                            shutil.move(plotname, self.outpath + "pages/figures/")
124                        elements.append("![" + title + "](figures/" + plotname + ")")
125
126                    if "rating" in subkey:
127                        if "title" in cluster:
128                            title = cluster["title"]
129                        plotname = self.make_rating(cluster["identifier"], title)
130                        if plotname:
131                            if os.path.exists(
132                                self.outpath + "pages/figures/" + plotname
133                            ):
134                                os.remove(self.outpath + "pages/figures/" + plotname)
135                            shutil.move(plotname, self.outpath + "pages/figures/")
136                        elements.append("![" + title + "](figures/" + plotname + ")")
137
138                    if "multirate" in subkey:
139                        exclusion = ""
140                        if "title" in cluster:
141                            title = cluster["title"]
142                        if "exclude" in cluster:
143                            exclusion = cluster["exclude"]
144                        plotname = self.make_multirating(
145                            cluster["identifier"], title, exclusion
146                        )
147                        if plotname:
148                            if os.path.exists(
149                                self.outpath + "pages/figures/" + plotname
150                            ):
151                                os.remove(self.outpath + "pages/figures/" + plotname)
152                            shutil.move(plotname, self.outpath + "pages/figures/")
153                        elements.append("![" + title + "](figures/" + plotname + ")")
154
155                if output == "display":
156                    for element in elements:
157                        display(Markdown(element))
158                elif output == "pages":
159                    filename = chap
160                    if "filename" in self.conf[chap]:
161                        filename = self.conf[chap]["filename"]
162                    title = chap.lstrip("chap_")
163                    if "title" in self.conf[chap]:
164                        title = self.conf[chap]["title"]
165                        elements.append("## " + title + "\n")
166
167                    with open(self.outpath + "pages/" + filename + ".md", "w") as f:
168                        for element in elements:
169                            f.write(element + "\n")
170                    print("Title:", title)
171                    indexpage += "- [" + str(title) + "](pages/" + filename + ".md)\n"
172
173                if writepdf:
174                    alltext += "\n\n".join(
175                        [el.replace("figures/", "pages/figures/") for el in elements]
176                    )
177
178        if output == "pages":
179            indexpage += "\nBack to the [repository](https://youschnabel.github.io/EVERSE-landscape/), or look up the [code documentation](https://youschnabel.github.io/EVERSE-landscape/pydocs/surveyer.html)!\n"
180            indexpage += "\nThis site was built using [GitHub Pages](https://pages.github.com) and Jekyll.\n"
181            with open(self.outpath + "index.md", "w") as f:
182                f.write(indexpage)
183
184        if writepdf:
185            pdffilepath = self.outpath + "EVERSEsurveyresults.pdf"
186            if "pdfname" in self.conf:
187                pdffilepath = self.outpath + self.conf["pdfname"]
188            mdfilepath = self.outpath + pdffilepath.rstrip(".pdf") + ".md"
189            with open(mdfilepath, "w") as temp_md_file:
190                temp_md_file.write(alltext)
191            try:
192                # Call Pandoc to convert the Markdown file to PDF
193                subprocess.run(
194                    [
195                        "pandoc",
196                        mdfilepath,
197                        "--from=markdown",
198                        "--to=pdf",
199                        "--output",
200                        pdffilepath,
201                        "--template=template.tex",
202                    ],
203                    check=True,
204                )
205                print(f"PDF generated successfully: {pdffilepath}")
206            except subprocess.CalledProcessError:
207                print("Error: Pandoc failed to convert Markdown to PDF.")
208
209    def make_table(self, questionids, alttitles=[]):
210        """Produces table from text answers, deleting rows without answers."""
211
212        acceptedtypes = ["text", "enumerate", "select"]
213
214        df_all = self.dataset.extract_subset(questionids, acceptedtypes)
215        df = df_all.dropna(how="all").fillna("")
216        df = df.applymap(_format_urls_in_text)
217
218        if alttitles and len(alttitles) == len(questionids):
219            newnames = {}
220            for i in range(len(alttitles)):
221                newnames.setdefault(
222                    self.dataset.metadata[questionids[i]]["question"], alttitles[i]
223                )
224            df = df.rename(columns=newnames)
225
226        return df.to_markdown()
227
228    def make_count_chart(self, questionid, charttype="bar", title="", altanswers=[]):
229        """Produces a graphic to show basic statistic of selection or enumeration questions.
230        Chart type can be 'bar', 'pie' or 'line', and you can provide alternative title and answer lables."""
231
232        acceptedtypes = ["select", "enumerate"]
233
234        if not title:
235            title = "Count of responses"
236
237        df_extract = self.dataset.extract_subset([questionid], acceptedtypes)
238
239        if self.dataset.metadata["A2"]["entrytype"] == "enumerate":
240            df_nona = df_extract.dropna()
241            df_enlisted = [
242                entry.split("; ") for entry in df_nona[df_extract.columns[0]]
243            ]
244            df_extract = pd.DataFrame(
245                {
246                    df_extract.columns[0]: [
247                        item for sublist in df_enlisted for item in sublist
248                    ]
249                }
250            )
251
252        answer_counts = df_extract.value_counts()
253
254        # create the bar chart
255        plt.figure(figsize=(6, 4))
256        if charttype != "pie":
257            answer_counts.plot(kind=charttype)
258            plt.xlabel("Response")
259            plt.ylabel("Frequency")
260            plt.xticks(rotation=40, ha="right")
261        else:
262            plt.xlabel("Number of entries: " + str(answer_counts.sum()))
263
264        plt.title(title)
265
266        if altanswers:
267            if len(answer_counts) == len(altanswers):
268                if charttype != "pie":
269                    plt.xticks(
270                        ticks=range(len(altanswers)),
271                        labels=altanswers,
272                        rotation=40,
273                        ha="right",
274                    )
275                else:
276                    plt.pie(
277                        answer_counts,
278                        labels=altanswers,
279                        autopct="%1.1f%%",
280                        startangle=90,
281                    )
282            else:
283                print(
284                    "Alternative options length",
285                    len(altanswers),
286                    "does not match required length",
287                    len(answer_counts),
288                )
289
290        filename = (
291            "plot_"
292            + charttype
293            + "_"
294            + str(questionid)
295            .replace("[", "")
296            .replace("]", "")
297            .replace(",", "_")
298            .replace("'", "")
299            + ".png"
300        )
301
302        plt.tight_layout()
303        # Save the chart as an image
304        plt.savefig(filename)
305
306        return filename
307
308    def make_compare_likert(self, questionid, subquestion, title="", altanswers=[]):
309        """Displays several Likert-scale type answers for comparison.
310        Needs subquestion to select all subquestions which contain the provided string.
311        Can be passed title for the plot and answer options to replace the original ones.
312        Scales can be provided under the 'scales' entry in the configuration file.
313        """
314        acceptedtypes = ["select"]
315
316        if not title:
317            title = "Estimate"
318
319        # df_extract = self.dataset.extract_subset(questionids, acceptedtypes)
320        # # also extract subquestion
321
322        columns = []
323        altnames = []
324        for i in range(len(self.dataset.metadata[questionid]["subquestions"])):
325            otherpart = []
326            foundpart = ""
327            for entry in self.dataset.metadata[questionid]["subquestions"][i]:
328                if entry.find(subquestion) > -1:
329                    foundpart = self.dataset.metadata[questionid]["colnames"][i]
330                else:
331                    otherpart.append(entry)
332            if foundpart:
333                columns.append(foundpart)
334                altnames.append(otherpart)
335
336        if altanswers:
337            altnames = altanswers
338        else:
339            altnames = [
340                str(ent).replace("['", "").replace("']", "").replace("','", " : ")
341                for ent in altnames
342            ]
343
344        plottype = ""
345        options = ""
346
347        for col in columns:
348            if col in self.dataset.metadata[questionid]["params"]["options"]:
349                if not options:
350                    options = self.dataset.metadata[questionid]["params"]["options"][
351                        col
352                    ]
353                else:
354                    newopts = self.dataset.metadata[questionid]["params"]["options"][
355                        col
356                    ]
357                    for opt in newopts:
358                        if not opt in options:
359                            options.append(opt)
360
361            if col in self.dataset.metadata[questionid]["params"]["subtypes"]:
362                if not plottype:
363                    plottype = self.dataset.metadata[questionid]["params"]["subtypes"][
364                        col
365                    ]
366                else:
367                    if (
368                        self.dataset.metadata[questionid]["params"]["subtypes"][col]
369                        != plottype
370                    ):
371                        print(
372                            "Question types don't match!",
373                            self.dataset.metadata[questionid]["params"]["subtypes"][
374                                col
375                            ],
376                            plottype,
377                        )
378                        plottype = "mismatch"
379
380        scale = []
381
382        if "scales" in self.conf:
383            for skey in self.conf["scales"]:
384                if sorted(self.conf["scales"][skey]["options"]) == sorted(options):
385                    scale = self.conf["scales"][skey]["options"]
386
387        if not scale:
388            scale = options
389
390        df = self.dataset.data[columns]
391
392        df_melted = df.melt(var_name=subquestion, value_name="Response")
393
394        # Plot the grouped bar chart
395        plt.figure(figsize=(12, 6))
396        sns.countplot(
397            data=df_melted,
398            x=subquestion,
399            hue="Response",
400            order=df.columns,
401            hue_order=scale,
402            palette="coolwarm",
403        )
404
405        # Add titles and labels
406        plt.title(title)
407        plt.xlabel("Question")
408        plt.ylabel("Count")
409        plt.legend(
410            title=subquestion, bbox_to_anchor=(1.05, 1), loc="upper left"
411        )  # Move the legend outside
412        # Rotate x-axis labels if neede
413        plt.xticks(ticks=range(len(altnames)), labels=altnames)
414
415        plt.tight_layout()  # Adjust layout for readability
416
417        filename = (
418            "plot_" + subquestion.replace(" ", "_") + "_" + str(questionid) + ".png"
419        )
420
421        plt.savefig(filename)
422
423        return filename
424
425    def make_rating(self, questionid, title=""):
426        """Displays distribution of a rating question."""
427
428        acceptedtypes = ["rating", "select"]
429
430        df_extract_all = self.dataset.extract_subset([questionid])
431        df_extract = df_extract_all[df_extract_all.columns[0]]
432
433        all_ratings = list(
434            range(
435                1, self.dataset.metadata[questionid]["params"]["options"]["factor"] + 1
436            )
437        )
438
439        df_extract = pd.DataFrame(
440            {
441                df_extract_all.columns[0]: [
442                    int(entry.split("/")[0]) for entry in df_extract
443                ]
444            }
445        ).dropna()
446        df_extract = pd.to_numeric(
447            df_extract[df_extract_all.columns[0]], errors="coerce"
448        )
449        rating_counts = df_extract.value_counts().reindex(all_ratings, fill_value=0)
450
451        # Calculate Mean and RMS
452        mean_rating = df_extract.mean()
453
454        # Plot the distribution of ratings
455        plt.figure(figsize=(8, 6))
456        ax = sns.barplot(
457            x=rating_counts.index, y=rating_counts.values, palette="viridis"
458        )
459
460        # Add Mean and RMS to the legend
461        mean_label = f"Mean: {mean_rating:.2f}"
462        handles, labels = ax.get_legend_handles_labels()
463        handles.extend([plt.Line2D([0], [0], color="none", label=mean_label)])
464        ax.legend(handles=handles, loc="upper right")
465
466        if not title:
467            title = df_extract_all.columns[0]
468
469        # Adding titles and labels
470        plt.title(title)
471        plt.xlabel("Rating")
472        plt.ylabel("Count")
473
474        filename = "plot_rating_" + str(questionid) + ".png"
475
476        plt.savefig(filename)
477
478        return filename
479
480    def make_multirating(self, questionid, title="", excludeterms=[]):
481        """Displays distribution of several rating question."""
482
483        columnsall = self.dataset.metadata[questionid]["colnames"]
484
485        columns = []
486        for col in columnsall:
487            dontuse = False
488            for term in excludeterms:
489                if col.find(term) > -1:
490                    dontuse = True
491            if not excludeterms or not dontuse:
492                columns.append(col)
493
494        df = self.dataset.data[columns].dropna()
495        df[columns] = df[columns].applymap(lambda x: int(x.split("/")[0]))
496
497        altnames = [
498            str(entry[0]) for entry in self.dataset.metadata[questionid]["subquestions"]
499        ]
500
501        renamemap = {}
502        for i in range(len(columnsall)):
503            dontuse = False
504            for term in excludeterms:
505                if columnsall[i].find(term) > -1:
506                    dontuse = True
507            if not excludeterms or not dontuse:
508                renamemap.setdefault(columnsall[i], altnames[i])
509        df = df.rename(columns=renamemap)
510
511        # Calculate mean and standard deviation for each question
512        means = df.mean()
513        stds = df.std()
514
515        # Plot each question as a horizontal bar with error bars for standard
516        # deviation
517        plt.figure(figsize=(8, 6))
518        plt.barh(
519            y=means.index, width=means, xerr=stds, color="skyblue", edgecolor="gray"
520        )
521
522        # Add mean values next to bars for clarity
523        for index, value in enumerate(means):
524            plt.text(value + 0.1, index, f"{value:.2f}", va="center", color="black")
525
526        if not title:
527            title = "Mean and Spread of Ratings by Question"
528        # Add labels and title
529        plt.xlabel(
530            "Rating out of "
531            + str(self.dataset.metadata[questionid]["params"]["options"]["factor"])
532        )
533        plt.title(title)
534        plt.grid(axis="x", linestyle="--", alpha=0.7)
535
536        plt.tight_layout()
537
538        filename = (
539            "plot_multirating_"
540            + str(questionid)
541            + "_"
542            + str(excludeterms).lstrip("['").rstrip("']").replace("', '", "_")
543            + ".png"
544        )
545
546        plt.savefig(filename)
547
548        return filename
549
550
551def _format_urls_in_text(text):
552    # Regular expression to match URLs
553    url_pattern = r"(https?://[^\s]+)"
554    # Replace each URL with Markdown formatted link
555    return re.sub(url_pattern, r"[\1](\1)", text)
class ReportMaker:
 15class ReportMaker:
 16    def __init__(self, datasetpath="", configpath="", outfilepath=""):
 17        self.conf = False
 18        self.dataset = False
 19        if outfilepath:
 20            self.outpath = outfilepath.rstrip("/") + "/"
 21        else:
 22            self.outpath = outfilepath
 23
 24        if configpath:
 25            with open(configpath, "r") as file:
 26                self.conf = yaml.safe_load(file)
 27        if datasetpath:
 28            self.dataset = DataSet(datasetpath)
 29
 30    def create_report(self, output="display", writepdf=False):
 31        """Creates the full report from the provided configuration (structure.yml) file.
 32        Can output to notebook (display) or as markdown files (pages).
 33        If 'writepdf' is set, a summary markdown and pdf is produced. You can set the pdfname in the configuration file."""
 34
 35        if not self.conf:
 36            print("You did not provide a config file!")
 37            return
 38
 39        if output == "pages":
 40            indexpage = (
 41                "# EVERSE software quality landscaping survey\n\n## Survey results\n\n"
 42            )
 43
 44            if not os.path.exists(self.outpath + "pages/"):
 45                os.makedirs(self.outpath + "pages/")
 46
 47            if not os.path.exists(self.outpath + "pages/figures/"):
 48                os.makedirs(self.outpath + "pages/figures/")
 49
 50        if writepdf:
 51            alltext = (
 52                "# EVERSE software quality landscaping survey\n\n## Survey results\n\n"
 53            )
 54
 55        # write pages for all "chapters" defined in the configuration
 56        for chap in self.conf:
 57            elements = []
 58            title = ""
 59            if chap.find("chap") > -1:
 60                title = chap.lstrip("chap_")
 61                if "title" in self.conf[chap]:
 62                    title = self.conf[chap]["title"]
 63                    elements.append("## " + title + "\n")
 64                if "description" in self.conf[chap]:
 65                    elements.append(self.conf[chap]["description"] + "\n")
 66                for subkey in self.conf[chap]:
 67                    cluster = self.conf[chap][subkey]
 68                    if "title" in cluster:
 69                        elements.append("### " + cluster["title"] + "\n")
 70                    if "description" in cluster:
 71                        elements.append(cluster["description"] + "\n")
 72                    if "table" in subkey:
 73                        if not "alttitles" in cluster:
 74                            elements.append(
 75                                self.make_table(cluster["identifiers"]) + "\n"
 76                            )
 77                        else:
 78                            elements.append(
 79                                self.make_table(
 80                                    cluster["identifiers"], cluster["alttitles"]
 81                                )
 82                                + "\n"
 83                            )
 84
 85                    if "count" in subkey:
 86                        charttype = "bar"
 87                        title = "Counts"
 88                        altanswers = []
 89                        if "altoptions" in cluster:
 90                            altanswers = cluster["altoptions"]
 91                        if "title" in cluster:
 92                            title = cluster["title"]
 93                        if "charttype" in cluster:
 94                            charttype = cluster["charttype"]
 95                        plotname = self.make_count_chart(
 96                            cluster["identifier"], charttype, title, altanswers
 97                        )
 98                        if plotname:
 99                            if os.path.exists(
100                                self.outpath + "pages/figures/" + plotname
101                            ):
102                                os.remove(self.outpath + "pages/figures/" + plotname)
103                            shutil.move(plotname, self.outpath + "pages/figures/")
104                        elements.append("![" + title + "](figures/" + plotname + ")")
105
106                    if "compare" in subkey:
107                        selector = ""
108                        altanswers = []
109                        if "title" in cluster:
110                            title = cluster["title"]
111                        if "altoptions" in cluster:
112                            altanswers = cluster["altoptions"]
113                        plotname = self.make_compare_likert(
114                            cluster["identifier"],
115                            cluster["selectedparts"],
116                            title,
117                            altanswers,
118                        )
119                        if plotname:
120                            if os.path.exists(
121                                self.outpath + "pages/figures/" + plotname
122                            ):
123                                os.remove(self.outpath + "pages/figures/" + plotname)
124                            shutil.move(plotname, self.outpath + "pages/figures/")
125                        elements.append("![" + title + "](figures/" + plotname + ")")
126
127                    if "rating" in subkey:
128                        if "title" in cluster:
129                            title = cluster["title"]
130                        plotname = self.make_rating(cluster["identifier"], title)
131                        if plotname:
132                            if os.path.exists(
133                                self.outpath + "pages/figures/" + plotname
134                            ):
135                                os.remove(self.outpath + "pages/figures/" + plotname)
136                            shutil.move(plotname, self.outpath + "pages/figures/")
137                        elements.append("![" + title + "](figures/" + plotname + ")")
138
139                    if "multirate" in subkey:
140                        exclusion = ""
141                        if "title" in cluster:
142                            title = cluster["title"]
143                        if "exclude" in cluster:
144                            exclusion = cluster["exclude"]
145                        plotname = self.make_multirating(
146                            cluster["identifier"], title, exclusion
147                        )
148                        if plotname:
149                            if os.path.exists(
150                                self.outpath + "pages/figures/" + plotname
151                            ):
152                                os.remove(self.outpath + "pages/figures/" + plotname)
153                            shutil.move(plotname, self.outpath + "pages/figures/")
154                        elements.append("![" + title + "](figures/" + plotname + ")")
155
156                if output == "display":
157                    for element in elements:
158                        display(Markdown(element))
159                elif output == "pages":
160                    filename = chap
161                    if "filename" in self.conf[chap]:
162                        filename = self.conf[chap]["filename"]
163                    title = chap.lstrip("chap_")
164                    if "title" in self.conf[chap]:
165                        title = self.conf[chap]["title"]
166                        elements.append("## " + title + "\n")
167
168                    with open(self.outpath + "pages/" + filename + ".md", "w") as f:
169                        for element in elements:
170                            f.write(element + "\n")
171                    print("Title:", title)
172                    indexpage += "- [" + str(title) + "](pages/" + filename + ".md)\n"
173
174                if writepdf:
175                    alltext += "\n\n".join(
176                        [el.replace("figures/", "pages/figures/") for el in elements]
177                    )
178
179        if output == "pages":
180            indexpage += "\nBack to the [repository](https://youschnabel.github.io/EVERSE-landscape/), or look up the [code documentation](https://youschnabel.github.io/EVERSE-landscape/pydocs/surveyer.html)!\n"
181            indexpage += "\nThis site was built using [GitHub Pages](https://pages.github.com) and Jekyll.\n"
182            with open(self.outpath + "index.md", "w") as f:
183                f.write(indexpage)
184
185        if writepdf:
186            pdffilepath = self.outpath + "EVERSEsurveyresults.pdf"
187            if "pdfname" in self.conf:
188                pdffilepath = self.outpath + self.conf["pdfname"]
189            mdfilepath = self.outpath + pdffilepath.rstrip(".pdf") + ".md"
190            with open(mdfilepath, "w") as temp_md_file:
191                temp_md_file.write(alltext)
192            try:
193                # Call Pandoc to convert the Markdown file to PDF
194                subprocess.run(
195                    [
196                        "pandoc",
197                        mdfilepath,
198                        "--from=markdown",
199                        "--to=pdf",
200                        "--output",
201                        pdffilepath,
202                        "--template=template.tex",
203                    ],
204                    check=True,
205                )
206                print(f"PDF generated successfully: {pdffilepath}")
207            except subprocess.CalledProcessError:
208                print("Error: Pandoc failed to convert Markdown to PDF.")
209
210    def make_table(self, questionids, alttitles=[]):
211        """Produces table from text answers, deleting rows without answers."""
212
213        acceptedtypes = ["text", "enumerate", "select"]
214
215        df_all = self.dataset.extract_subset(questionids, acceptedtypes)
216        df = df_all.dropna(how="all").fillna("")
217        df = df.applymap(_format_urls_in_text)
218
219        if alttitles and len(alttitles) == len(questionids):
220            newnames = {}
221            for i in range(len(alttitles)):
222                newnames.setdefault(
223                    self.dataset.metadata[questionids[i]]["question"], alttitles[i]
224                )
225            df = df.rename(columns=newnames)
226
227        return df.to_markdown()
228
229    def make_count_chart(self, questionid, charttype="bar", title="", altanswers=[]):
230        """Produces a graphic to show basic statistic of selection or enumeration questions.
231        Chart type can be 'bar', 'pie' or 'line', and you can provide alternative title and answer lables."""
232
233        acceptedtypes = ["select", "enumerate"]
234
235        if not title:
236            title = "Count of responses"
237
238        df_extract = self.dataset.extract_subset([questionid], acceptedtypes)
239
240        if self.dataset.metadata["A2"]["entrytype"] == "enumerate":
241            df_nona = df_extract.dropna()
242            df_enlisted = [
243                entry.split("; ") for entry in df_nona[df_extract.columns[0]]
244            ]
245            df_extract = pd.DataFrame(
246                {
247                    df_extract.columns[0]: [
248                        item for sublist in df_enlisted for item in sublist
249                    ]
250                }
251            )
252
253        answer_counts = df_extract.value_counts()
254
255        # create the bar chart
256        plt.figure(figsize=(6, 4))
257        if charttype != "pie":
258            answer_counts.plot(kind=charttype)
259            plt.xlabel("Response")
260            plt.ylabel("Frequency")
261            plt.xticks(rotation=40, ha="right")
262        else:
263            plt.xlabel("Number of entries: " + str(answer_counts.sum()))
264
265        plt.title(title)
266
267        if altanswers:
268            if len(answer_counts) == len(altanswers):
269                if charttype != "pie":
270                    plt.xticks(
271                        ticks=range(len(altanswers)),
272                        labels=altanswers,
273                        rotation=40,
274                        ha="right",
275                    )
276                else:
277                    plt.pie(
278                        answer_counts,
279                        labels=altanswers,
280                        autopct="%1.1f%%",
281                        startangle=90,
282                    )
283            else:
284                print(
285                    "Alternative options length",
286                    len(altanswers),
287                    "does not match required length",
288                    len(answer_counts),
289                )
290
291        filename = (
292            "plot_"
293            + charttype
294            + "_"
295            + str(questionid)
296            .replace("[", "")
297            .replace("]", "")
298            .replace(",", "_")
299            .replace("'", "")
300            + ".png"
301        )
302
303        plt.tight_layout()
304        # Save the chart as an image
305        plt.savefig(filename)
306
307        return filename
308
309    def make_compare_likert(self, questionid, subquestion, title="", altanswers=[]):
310        """Displays several Likert-scale type answers for comparison.
311        Needs subquestion to select all subquestions which contain the provided string.
312        Can be passed title for the plot and answer options to replace the original ones.
313        Scales can be provided under the 'scales' entry in the configuration file.
314        """
315        acceptedtypes = ["select"]
316
317        if not title:
318            title = "Estimate"
319
320        # df_extract = self.dataset.extract_subset(questionids, acceptedtypes)
321        # # also extract subquestion
322
323        columns = []
324        altnames = []
325        for i in range(len(self.dataset.metadata[questionid]["subquestions"])):
326            otherpart = []
327            foundpart = ""
328            for entry in self.dataset.metadata[questionid]["subquestions"][i]:
329                if entry.find(subquestion) > -1:
330                    foundpart = self.dataset.metadata[questionid]["colnames"][i]
331                else:
332                    otherpart.append(entry)
333            if foundpart:
334                columns.append(foundpart)
335                altnames.append(otherpart)
336
337        if altanswers:
338            altnames = altanswers
339        else:
340            altnames = [
341                str(ent).replace("['", "").replace("']", "").replace("','", " : ")
342                for ent in altnames
343            ]
344
345        plottype = ""
346        options = ""
347
348        for col in columns:
349            if col in self.dataset.metadata[questionid]["params"]["options"]:
350                if not options:
351                    options = self.dataset.metadata[questionid]["params"]["options"][
352                        col
353                    ]
354                else:
355                    newopts = self.dataset.metadata[questionid]["params"]["options"][
356                        col
357                    ]
358                    for opt in newopts:
359                        if not opt in options:
360                            options.append(opt)
361
362            if col in self.dataset.metadata[questionid]["params"]["subtypes"]:
363                if not plottype:
364                    plottype = self.dataset.metadata[questionid]["params"]["subtypes"][
365                        col
366                    ]
367                else:
368                    if (
369                        self.dataset.metadata[questionid]["params"]["subtypes"][col]
370                        != plottype
371                    ):
372                        print(
373                            "Question types don't match!",
374                            self.dataset.metadata[questionid]["params"]["subtypes"][
375                                col
376                            ],
377                            plottype,
378                        )
379                        plottype = "mismatch"
380
381        scale = []
382
383        if "scales" in self.conf:
384            for skey in self.conf["scales"]:
385                if sorted(self.conf["scales"][skey]["options"]) == sorted(options):
386                    scale = self.conf["scales"][skey]["options"]
387
388        if not scale:
389            scale = options
390
391        df = self.dataset.data[columns]
392
393        df_melted = df.melt(var_name=subquestion, value_name="Response")
394
395        # Plot the grouped bar chart
396        plt.figure(figsize=(12, 6))
397        sns.countplot(
398            data=df_melted,
399            x=subquestion,
400            hue="Response",
401            order=df.columns,
402            hue_order=scale,
403            palette="coolwarm",
404        )
405
406        # Add titles and labels
407        plt.title(title)
408        plt.xlabel("Question")
409        plt.ylabel("Count")
410        plt.legend(
411            title=subquestion, bbox_to_anchor=(1.05, 1), loc="upper left"
412        )  # Move the legend outside
413        # Rotate x-axis labels if neede
414        plt.xticks(ticks=range(len(altnames)), labels=altnames)
415
416        plt.tight_layout()  # Adjust layout for readability
417
418        filename = (
419            "plot_" + subquestion.replace(" ", "_") + "_" + str(questionid) + ".png"
420        )
421
422        plt.savefig(filename)
423
424        return filename
425
426    def make_rating(self, questionid, title=""):
427        """Displays distribution of a rating question."""
428
429        acceptedtypes = ["rating", "select"]
430
431        df_extract_all = self.dataset.extract_subset([questionid])
432        df_extract = df_extract_all[df_extract_all.columns[0]]
433
434        all_ratings = list(
435            range(
436                1, self.dataset.metadata[questionid]["params"]["options"]["factor"] + 1
437            )
438        )
439
440        df_extract = pd.DataFrame(
441            {
442                df_extract_all.columns[0]: [
443                    int(entry.split("/")[0]) for entry in df_extract
444                ]
445            }
446        ).dropna()
447        df_extract = pd.to_numeric(
448            df_extract[df_extract_all.columns[0]], errors="coerce"
449        )
450        rating_counts = df_extract.value_counts().reindex(all_ratings, fill_value=0)
451
452        # Calculate Mean and RMS
453        mean_rating = df_extract.mean()
454
455        # Plot the distribution of ratings
456        plt.figure(figsize=(8, 6))
457        ax = sns.barplot(
458            x=rating_counts.index, y=rating_counts.values, palette="viridis"
459        )
460
461        # Add Mean and RMS to the legend
462        mean_label = f"Mean: {mean_rating:.2f}"
463        handles, labels = ax.get_legend_handles_labels()
464        handles.extend([plt.Line2D([0], [0], color="none", label=mean_label)])
465        ax.legend(handles=handles, loc="upper right")
466
467        if not title:
468            title = df_extract_all.columns[0]
469
470        # Adding titles and labels
471        plt.title(title)
472        plt.xlabel("Rating")
473        plt.ylabel("Count")
474
475        filename = "plot_rating_" + str(questionid) + ".png"
476
477        plt.savefig(filename)
478
479        return filename
480
481    def make_multirating(self, questionid, title="", excludeterms=[]):
482        """Displays distribution of several rating question."""
483
484        columnsall = self.dataset.metadata[questionid]["colnames"]
485
486        columns = []
487        for col in columnsall:
488            dontuse = False
489            for term in excludeterms:
490                if col.find(term) > -1:
491                    dontuse = True
492            if not excludeterms or not dontuse:
493                columns.append(col)
494
495        df = self.dataset.data[columns].dropna()
496        df[columns] = df[columns].applymap(lambda x: int(x.split("/")[0]))
497
498        altnames = [
499            str(entry[0]) for entry in self.dataset.metadata[questionid]["subquestions"]
500        ]
501
502        renamemap = {}
503        for i in range(len(columnsall)):
504            dontuse = False
505            for term in excludeterms:
506                if columnsall[i].find(term) > -1:
507                    dontuse = True
508            if not excludeterms or not dontuse:
509                renamemap.setdefault(columnsall[i], altnames[i])
510        df = df.rename(columns=renamemap)
511
512        # Calculate mean and standard deviation for each question
513        means = df.mean()
514        stds = df.std()
515
516        # Plot each question as a horizontal bar with error bars for standard
517        # deviation
518        plt.figure(figsize=(8, 6))
519        plt.barh(
520            y=means.index, width=means, xerr=stds, color="skyblue", edgecolor="gray"
521        )
522
523        # Add mean values next to bars for clarity
524        for index, value in enumerate(means):
525            plt.text(value + 0.1, index, f"{value:.2f}", va="center", color="black")
526
527        if not title:
528            title = "Mean and Spread of Ratings by Question"
529        # Add labels and title
530        plt.xlabel(
531            "Rating out of "
532            + str(self.dataset.metadata[questionid]["params"]["options"]["factor"])
533        )
534        plt.title(title)
535        plt.grid(axis="x", linestyle="--", alpha=0.7)
536
537        plt.tight_layout()
538
539        filename = (
540            "plot_multirating_"
541            + str(questionid)
542            + "_"
543            + str(excludeterms).lstrip("['").rstrip("']").replace("', '", "_")
544            + ".png"
545        )
546
547        plt.savefig(filename)
548
549        return filename
ReportMaker(datasetpath='', configpath='', outfilepath='')
16    def __init__(self, datasetpath="", configpath="", outfilepath=""):
17        self.conf = False
18        self.dataset = False
19        if outfilepath:
20            self.outpath = outfilepath.rstrip("/") + "/"
21        else:
22            self.outpath = outfilepath
23
24        if configpath:
25            with open(configpath, "r") as file:
26                self.conf = yaml.safe_load(file)
27        if datasetpath:
28            self.dataset = DataSet(datasetpath)
conf
dataset
def create_report(self, output='display', writepdf=False):
 30    def create_report(self, output="display", writepdf=False):
 31        """Creates the full report from the provided configuration (structure.yml) file.
 32        Can output to notebook (display) or as markdown files (pages).
 33        If 'writepdf' is set, a summary markdown and pdf is produced. You can set the pdfname in the configuration file."""
 34
 35        if not self.conf:
 36            print("You did not provide a config file!")
 37            return
 38
 39        if output == "pages":
 40            indexpage = (
 41                "# EVERSE software quality landscaping survey\n\n## Survey results\n\n"
 42            )
 43
 44            if not os.path.exists(self.outpath + "pages/"):
 45                os.makedirs(self.outpath + "pages/")
 46
 47            if not os.path.exists(self.outpath + "pages/figures/"):
 48                os.makedirs(self.outpath + "pages/figures/")
 49
 50        if writepdf:
 51            alltext = (
 52                "# EVERSE software quality landscaping survey\n\n## Survey results\n\n"
 53            )
 54
 55        # write pages for all "chapters" defined in the configuration
 56        for chap in self.conf:
 57            elements = []
 58            title = ""
 59            if chap.find("chap") > -1:
 60                title = chap.lstrip("chap_")
 61                if "title" in self.conf[chap]:
 62                    title = self.conf[chap]["title"]
 63                    elements.append("## " + title + "\n")
 64                if "description" in self.conf[chap]:
 65                    elements.append(self.conf[chap]["description"] + "\n")
 66                for subkey in self.conf[chap]:
 67                    cluster = self.conf[chap][subkey]
 68                    if "title" in cluster:
 69                        elements.append("### " + cluster["title"] + "\n")
 70                    if "description" in cluster:
 71                        elements.append(cluster["description"] + "\n")
 72                    if "table" in subkey:
 73                        if not "alttitles" in cluster:
 74                            elements.append(
 75                                self.make_table(cluster["identifiers"]) + "\n"
 76                            )
 77                        else:
 78                            elements.append(
 79                                self.make_table(
 80                                    cluster["identifiers"], cluster["alttitles"]
 81                                )
 82                                + "\n"
 83                            )
 84
 85                    if "count" in subkey:
 86                        charttype = "bar"
 87                        title = "Counts"
 88                        altanswers = []
 89                        if "altoptions" in cluster:
 90                            altanswers = cluster["altoptions"]
 91                        if "title" in cluster:
 92                            title = cluster["title"]
 93                        if "charttype" in cluster:
 94                            charttype = cluster["charttype"]
 95                        plotname = self.make_count_chart(
 96                            cluster["identifier"], charttype, title, altanswers
 97                        )
 98                        if plotname:
 99                            if os.path.exists(
100                                self.outpath + "pages/figures/" + plotname
101                            ):
102                                os.remove(self.outpath + "pages/figures/" + plotname)
103                            shutil.move(plotname, self.outpath + "pages/figures/")
104                        elements.append("![" + title + "](figures/" + plotname + ")")
105
106                    if "compare" in subkey:
107                        selector = ""
108                        altanswers = []
109                        if "title" in cluster:
110                            title = cluster["title"]
111                        if "altoptions" in cluster:
112                            altanswers = cluster["altoptions"]
113                        plotname = self.make_compare_likert(
114                            cluster["identifier"],
115                            cluster["selectedparts"],
116                            title,
117                            altanswers,
118                        )
119                        if plotname:
120                            if os.path.exists(
121                                self.outpath + "pages/figures/" + plotname
122                            ):
123                                os.remove(self.outpath + "pages/figures/" + plotname)
124                            shutil.move(plotname, self.outpath + "pages/figures/")
125                        elements.append("![" + title + "](figures/" + plotname + ")")
126
127                    if "rating" in subkey:
128                        if "title" in cluster:
129                            title = cluster["title"]
130                        plotname = self.make_rating(cluster["identifier"], title)
131                        if plotname:
132                            if os.path.exists(
133                                self.outpath + "pages/figures/" + plotname
134                            ):
135                                os.remove(self.outpath + "pages/figures/" + plotname)
136                            shutil.move(plotname, self.outpath + "pages/figures/")
137                        elements.append("![" + title + "](figures/" + plotname + ")")
138
139                    if "multirate" in subkey:
140                        exclusion = ""
141                        if "title" in cluster:
142                            title = cluster["title"]
143                        if "exclude" in cluster:
144                            exclusion = cluster["exclude"]
145                        plotname = self.make_multirating(
146                            cluster["identifier"], title, exclusion
147                        )
148                        if plotname:
149                            if os.path.exists(
150                                self.outpath + "pages/figures/" + plotname
151                            ):
152                                os.remove(self.outpath + "pages/figures/" + plotname)
153                            shutil.move(plotname, self.outpath + "pages/figures/")
154                        elements.append("![" + title + "](figures/" + plotname + ")")
155
156                if output == "display":
157                    for element in elements:
158                        display(Markdown(element))
159                elif output == "pages":
160                    filename = chap
161                    if "filename" in self.conf[chap]:
162                        filename = self.conf[chap]["filename"]
163                    title = chap.lstrip("chap_")
164                    if "title" in self.conf[chap]:
165                        title = self.conf[chap]["title"]
166                        elements.append("## " + title + "\n")
167
168                    with open(self.outpath + "pages/" + filename + ".md", "w") as f:
169                        for element in elements:
170                            f.write(element + "\n")
171                    print("Title:", title)
172                    indexpage += "- [" + str(title) + "](pages/" + filename + ".md)\n"
173
174                if writepdf:
175                    alltext += "\n\n".join(
176                        [el.replace("figures/", "pages/figures/") for el in elements]
177                    )
178
179        if output == "pages":
180            indexpage += "\nBack to the [repository](https://youschnabel.github.io/EVERSE-landscape/), or look up the [code documentation](https://youschnabel.github.io/EVERSE-landscape/pydocs/surveyer.html)!\n"
181            indexpage += "\nThis site was built using [GitHub Pages](https://pages.github.com) and Jekyll.\n"
182            with open(self.outpath + "index.md", "w") as f:
183                f.write(indexpage)
184
185        if writepdf:
186            pdffilepath = self.outpath + "EVERSEsurveyresults.pdf"
187            if "pdfname" in self.conf:
188                pdffilepath = self.outpath + self.conf["pdfname"]
189            mdfilepath = self.outpath + pdffilepath.rstrip(".pdf") + ".md"
190            with open(mdfilepath, "w") as temp_md_file:
191                temp_md_file.write(alltext)
192            try:
193                # Call Pandoc to convert the Markdown file to PDF
194                subprocess.run(
195                    [
196                        "pandoc",
197                        mdfilepath,
198                        "--from=markdown",
199                        "--to=pdf",
200                        "--output",
201                        pdffilepath,
202                        "--template=template.tex",
203                    ],
204                    check=True,
205                )
206                print(f"PDF generated successfully: {pdffilepath}")
207            except subprocess.CalledProcessError:
208                print("Error: Pandoc failed to convert Markdown to PDF.")

Creates the full report from the provided configuration (structure.yml) file. Can output to notebook (display) or as markdown files (pages). If 'writepdf' is set, a summary markdown and pdf is produced. You can set the pdfname in the configuration file.

def make_table(self, questionids, alttitles=[]):
210    def make_table(self, questionids, alttitles=[]):
211        """Produces table from text answers, deleting rows without answers."""
212
213        acceptedtypes = ["text", "enumerate", "select"]
214
215        df_all = self.dataset.extract_subset(questionids, acceptedtypes)
216        df = df_all.dropna(how="all").fillna("")
217        df = df.applymap(_format_urls_in_text)
218
219        if alttitles and len(alttitles) == len(questionids):
220            newnames = {}
221            for i in range(len(alttitles)):
222                newnames.setdefault(
223                    self.dataset.metadata[questionids[i]]["question"], alttitles[i]
224                )
225            df = df.rename(columns=newnames)
226
227        return df.to_markdown()

Produces table from text answers, deleting rows without answers.

def make_count_chart(self, questionid, charttype='bar', title='', altanswers=[]):
229    def make_count_chart(self, questionid, charttype="bar", title="", altanswers=[]):
230        """Produces a graphic to show basic statistic of selection or enumeration questions.
231        Chart type can be 'bar', 'pie' or 'line', and you can provide alternative title and answer lables."""
232
233        acceptedtypes = ["select", "enumerate"]
234
235        if not title:
236            title = "Count of responses"
237
238        df_extract = self.dataset.extract_subset([questionid], acceptedtypes)
239
240        if self.dataset.metadata["A2"]["entrytype"] == "enumerate":
241            df_nona = df_extract.dropna()
242            df_enlisted = [
243                entry.split("; ") for entry in df_nona[df_extract.columns[0]]
244            ]
245            df_extract = pd.DataFrame(
246                {
247                    df_extract.columns[0]: [
248                        item for sublist in df_enlisted for item in sublist
249                    ]
250                }
251            )
252
253        answer_counts = df_extract.value_counts()
254
255        # create the bar chart
256        plt.figure(figsize=(6, 4))
257        if charttype != "pie":
258            answer_counts.plot(kind=charttype)
259            plt.xlabel("Response")
260            plt.ylabel("Frequency")
261            plt.xticks(rotation=40, ha="right")
262        else:
263            plt.xlabel("Number of entries: " + str(answer_counts.sum()))
264
265        plt.title(title)
266
267        if altanswers:
268            if len(answer_counts) == len(altanswers):
269                if charttype != "pie":
270                    plt.xticks(
271                        ticks=range(len(altanswers)),
272                        labels=altanswers,
273                        rotation=40,
274                        ha="right",
275                    )
276                else:
277                    plt.pie(
278                        answer_counts,
279                        labels=altanswers,
280                        autopct="%1.1f%%",
281                        startangle=90,
282                    )
283            else:
284                print(
285                    "Alternative options length",
286                    len(altanswers),
287                    "does not match required length",
288                    len(answer_counts),
289                )
290
291        filename = (
292            "plot_"
293            + charttype
294            + "_"
295            + str(questionid)
296            .replace("[", "")
297            .replace("]", "")
298            .replace(",", "_")
299            .replace("'", "")
300            + ".png"
301        )
302
303        plt.tight_layout()
304        # Save the chart as an image
305        plt.savefig(filename)
306
307        return filename

Produces a graphic to show basic statistic of selection or enumeration questions. Chart type can be 'bar', 'pie' or 'line', and you can provide alternative title and answer lables.

def make_compare_likert(self, questionid, subquestion, title='', altanswers=[]):
309    def make_compare_likert(self, questionid, subquestion, title="", altanswers=[]):
310        """Displays several Likert-scale type answers for comparison.
311        Needs subquestion to select all subquestions which contain the provided string.
312        Can be passed title for the plot and answer options to replace the original ones.
313        Scales can be provided under the 'scales' entry in the configuration file.
314        """
315        acceptedtypes = ["select"]
316
317        if not title:
318            title = "Estimate"
319
320        # df_extract = self.dataset.extract_subset(questionids, acceptedtypes)
321        # # also extract subquestion
322
323        columns = []
324        altnames = []
325        for i in range(len(self.dataset.metadata[questionid]["subquestions"])):
326            otherpart = []
327            foundpart = ""
328            for entry in self.dataset.metadata[questionid]["subquestions"][i]:
329                if entry.find(subquestion) > -1:
330                    foundpart = self.dataset.metadata[questionid]["colnames"][i]
331                else:
332                    otherpart.append(entry)
333            if foundpart:
334                columns.append(foundpart)
335                altnames.append(otherpart)
336
337        if altanswers:
338            altnames = altanswers
339        else:
340            altnames = [
341                str(ent).replace("['", "").replace("']", "").replace("','", " : ")
342                for ent in altnames
343            ]
344
345        plottype = ""
346        options = ""
347
348        for col in columns:
349            if col in self.dataset.metadata[questionid]["params"]["options"]:
350                if not options:
351                    options = self.dataset.metadata[questionid]["params"]["options"][
352                        col
353                    ]
354                else:
355                    newopts = self.dataset.metadata[questionid]["params"]["options"][
356                        col
357                    ]
358                    for opt in newopts:
359                        if not opt in options:
360                            options.append(opt)
361
362            if col in self.dataset.metadata[questionid]["params"]["subtypes"]:
363                if not plottype:
364                    plottype = self.dataset.metadata[questionid]["params"]["subtypes"][
365                        col
366                    ]
367                else:
368                    if (
369                        self.dataset.metadata[questionid]["params"]["subtypes"][col]
370                        != plottype
371                    ):
372                        print(
373                            "Question types don't match!",
374                            self.dataset.metadata[questionid]["params"]["subtypes"][
375                                col
376                            ],
377                            plottype,
378                        )
379                        plottype = "mismatch"
380
381        scale = []
382
383        if "scales" in self.conf:
384            for skey in self.conf["scales"]:
385                if sorted(self.conf["scales"][skey]["options"]) == sorted(options):
386                    scale = self.conf["scales"][skey]["options"]
387
388        if not scale:
389            scale = options
390
391        df = self.dataset.data[columns]
392
393        df_melted = df.melt(var_name=subquestion, value_name="Response")
394
395        # Plot the grouped bar chart
396        plt.figure(figsize=(12, 6))
397        sns.countplot(
398            data=df_melted,
399            x=subquestion,
400            hue="Response",
401            order=df.columns,
402            hue_order=scale,
403            palette="coolwarm",
404        )
405
406        # Add titles and labels
407        plt.title(title)
408        plt.xlabel("Question")
409        plt.ylabel("Count")
410        plt.legend(
411            title=subquestion, bbox_to_anchor=(1.05, 1), loc="upper left"
412        )  # Move the legend outside
413        # Rotate x-axis labels if neede
414        plt.xticks(ticks=range(len(altnames)), labels=altnames)
415
416        plt.tight_layout()  # Adjust layout for readability
417
418        filename = (
419            "plot_" + subquestion.replace(" ", "_") + "_" + str(questionid) + ".png"
420        )
421
422        plt.savefig(filename)
423
424        return filename

Displays several Likert-scale type answers for comparison. Needs subquestion to select all subquestions which contain the provided string. Can be passed title for the plot and answer options to replace the original ones. Scales can be provided under the 'scales' entry in the configuration file.

def make_rating(self, questionid, title=''):
426    def make_rating(self, questionid, title=""):
427        """Displays distribution of a rating question."""
428
429        acceptedtypes = ["rating", "select"]
430
431        df_extract_all = self.dataset.extract_subset([questionid])
432        df_extract = df_extract_all[df_extract_all.columns[0]]
433
434        all_ratings = list(
435            range(
436                1, self.dataset.metadata[questionid]["params"]["options"]["factor"] + 1
437            )
438        )
439
440        df_extract = pd.DataFrame(
441            {
442                df_extract_all.columns[0]: [
443                    int(entry.split("/")[0]) for entry in df_extract
444                ]
445            }
446        ).dropna()
447        df_extract = pd.to_numeric(
448            df_extract[df_extract_all.columns[0]], errors="coerce"
449        )
450        rating_counts = df_extract.value_counts().reindex(all_ratings, fill_value=0)
451
452        # Calculate Mean and RMS
453        mean_rating = df_extract.mean()
454
455        # Plot the distribution of ratings
456        plt.figure(figsize=(8, 6))
457        ax = sns.barplot(
458            x=rating_counts.index, y=rating_counts.values, palette="viridis"
459        )
460
461        # Add Mean and RMS to the legend
462        mean_label = f"Mean: {mean_rating:.2f}"
463        handles, labels = ax.get_legend_handles_labels()
464        handles.extend([plt.Line2D([0], [0], color="none", label=mean_label)])
465        ax.legend(handles=handles, loc="upper right")
466
467        if not title:
468            title = df_extract_all.columns[0]
469
470        # Adding titles and labels
471        plt.title(title)
472        plt.xlabel("Rating")
473        plt.ylabel("Count")
474
475        filename = "plot_rating_" + str(questionid) + ".png"
476
477        plt.savefig(filename)
478
479        return filename

Displays distribution of a rating question.

def make_multirating(self, questionid, title='', excludeterms=[]):
481    def make_multirating(self, questionid, title="", excludeterms=[]):
482        """Displays distribution of several rating question."""
483
484        columnsall = self.dataset.metadata[questionid]["colnames"]
485
486        columns = []
487        for col in columnsall:
488            dontuse = False
489            for term in excludeterms:
490                if col.find(term) > -1:
491                    dontuse = True
492            if not excludeterms or not dontuse:
493                columns.append(col)
494
495        df = self.dataset.data[columns].dropna()
496        df[columns] = df[columns].applymap(lambda x: int(x.split("/")[0]))
497
498        altnames = [
499            str(entry[0]) for entry in self.dataset.metadata[questionid]["subquestions"]
500        ]
501
502        renamemap = {}
503        for i in range(len(columnsall)):
504            dontuse = False
505            for term in excludeterms:
506                if columnsall[i].find(term) > -1:
507                    dontuse = True
508            if not excludeterms or not dontuse:
509                renamemap.setdefault(columnsall[i], altnames[i])
510        df = df.rename(columns=renamemap)
511
512        # Calculate mean and standard deviation for each question
513        means = df.mean()
514        stds = df.std()
515
516        # Plot each question as a horizontal bar with error bars for standard
517        # deviation
518        plt.figure(figsize=(8, 6))
519        plt.barh(
520            y=means.index, width=means, xerr=stds, color="skyblue", edgecolor="gray"
521        )
522
523        # Add mean values next to bars for clarity
524        for index, value in enumerate(means):
525            plt.text(value + 0.1, index, f"{value:.2f}", va="center", color="black")
526
527        if not title:
528            title = "Mean and Spread of Ratings by Question"
529        # Add labels and title
530        plt.xlabel(
531            "Rating out of "
532            + str(self.dataset.metadata[questionid]["params"]["options"]["factor"])
533        )
534        plt.title(title)
535        plt.grid(axis="x", linestyle="--", alpha=0.7)
536
537        plt.tight_layout()
538
539        filename = (
540            "plot_multirating_"
541            + str(questionid)
542            + "_"
543            + str(excludeterms).lstrip("['").rstrip("']").replace("', '", "_")
544            + ".png"
545        )
546
547        plt.savefig(filename)
548
549        return filename

Displays distribution of several rating question.