summaryrefslogtreecommitdiff
path: root/deloopify.c
blob: 5006604b9c61e7bb6b3c139a8fb3e6c2e8819aab (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
/*
 * deloopify, a loop compactor for vbtracetool generated traces
 *
 * Copyright 2007 Stuart Bennett <sb476@cam.ac.uk>
 *
 * This program is released under the terms of the GNU General Public License, version 2
 */

#include <pci/pci.h>
#include <stdint.h>
#include <stdio.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/* length of sliding history of line beginnings */
#define MAX_LOOP_LEN 32768

#define SIG_LEN 128

bool loop_concluded(FILE *tracef, uint16_t loopendip)
{
	/* from the current position in the file (assumed to be a loop
	 * instruction), see if the next ip is 2 bigger (implies we fell out
	 * of the loop)
	 *
	 * if we hit eof, behave as if loop ended, and reset eof, so end of
	 * file gets read and written out by the caller
	 */
	fpos_t loopendpos;
	char line[180];

	fgetpos(tracef, &loopendpos);
	do {
		if (!fgets(line, 180, tracef))
			break;	// eof
	} while (line[4] != ':');
	fsetpos(tracef, &loopendpos);
	clearerr(tracef);

	if (strtoul(&line[5], NULL, 16) != loopendip + 2)	// we looped
		return false;

	return true;
}

int main(int argc, char *argv[])
{
	FILE *tracef, *outf;
	long inlineno = 0, outlineno = -1;
	typedef char asig[SIG_LEN];
	asig *sig;
	char loopsig[] = "Loop";
	bool borken = false;

	char outname[1024];
	strcpy(outname, argv[1]);
	strcat(outname, "-deloopd");
	if (!(tracef = fopen(argv[1], "r"))) {
		printf("File open failed\n");
		exit(EXIT_FAILURE);
	}
	if (!(outf = fopen(outname, "w"))) {
		printf("File open failed\n");
		exit(EXIT_FAILURE);
	}

	if (!(sig = calloc(MAX_LOOP_LEN, SIG_LEN))) {
		printf("Signature table calloc failed\n");
		exit(EXIT_FAILURE);
	}

	while (!feof(tracef)) {
		char line[180];

		if (!fgets(line, 180, tracef))
			break;	// eof
		inlineno += 1;
		outlineno += 1;

		/* sanity check trace - every fourth line should be an instruction */
		if (!borken && !(inlineno % 4) && line[4] != ':') {
			printf("Oh dear, an opcode seems to have broken at line %ld -- that's bad iff the input trace file is unmodified vbtracetool output\n", inlineno);
			borken = true;
		}

		/* append line number of input file to instructions */
		if (line[4] == ':') {
			int instrstrlen = strlen(line) - 1;
			memset(&line[instrstrlen], 0x20, 64 - instrstrlen);
			sprintf(&line[58], "	l%08ld\n", inlineno);
		}
		fwrite(line, strlen(line), 1, outf);

		/* lines which don't begin cs:ip are not interesting */
		if (line[4] != ':') {
			sig[outlineno % MAX_LOOP_LEN][0] = 0;
			continue;
		}

		/* we maintain a sliding history of previous line beginnings in sig,
		 * so we can search for the cs:ip of where the loop began
		 */
		strncpy(sig[outlineno % MAX_LOOP_LEN], line, 9);
		/* this is only necessary if you want intelligible debug printing */
//sig[outlineno % MAX_LOOP_LEN][9] = 0;

		if (strstr(line, "LOOP")) {
			uint16_t loopendip = strtoul(&line[5], NULL, 16);

			/* if the trace doesn't loop, there's nothing to do */
			if (loop_concluded(tracef, loopendip))
				goto next;

//fprintf(outf, "loop\n");
			/* adapt the loop ip detection for LOOP, LOOPE and LOOPNE opcodes */
			int jumpipofs = 0;
			switch (line[34]) {
			case 'E':
				jumpipofs += 1;
				break;
			case 'N':
				jumpipofs += 2;
				break;
			}

			uint16_t loopcs = strtoul(&line[0], NULL, 16);
			uint16_t loopstartip;
			/* for some reason, the loop ip is occasionally 8 bytes */
			if ((strtoul(&line[35], NULL, 16) & 0xffff0000) == 0xffff0000)
				loopstartip = strtoul(&line[39 + jumpipofs], NULL, 16);
			else
				loopstartip = strtoul(&line[35 + jumpipofs], NULL, 16);
			/* the 'signature' (cs:ip) of where the loop started */
			char startsig[10];
			sprintf(startsig, "%04x:%04x", loopcs, loopstartip);

			int looplen = 1;
			int bonus = 0;
			for ( ; looplen < MAX_LOOP_LEN && looplen < outlineno; looplen++) {
				/* on our way back through the sliding sig list, retrieve the
				 * uncontracted length of any previously compacted loops
				 */
				if (!strncmp(loopsig, sig[(outlineno - looplen) % MAX_LOOP_LEN], 4)) {
					int len, mult;
					sscanf(sig[(outlineno - looplen) % MAX_LOOP_LEN], "Loop of last %d lines repeats a further %d times", &len, &mult);
					if (mult)
						bonus += len * mult - 1;
				}
				/* when we find the loop start, stop */
				if (!strncmp(startsig, sig[(outlineno - looplen) % MAX_LOOP_LEN], 9))
					break;
			}

			/* we don't bail out on these two cases, as loops to places we haven't yet visited do occur,
			 * so just ignore this loop instr
			 */
			if (looplen == outlineno) /* didn't find start of loop and searched through complete history */
				continue;
			if (looplen == MAX_LOOP_LEN) { /* fail */
				printf("Didn't find start of loop at line %ld in history -- try increasing MAX_LOOP_LEN?\n", inlineno);
				continue;
			}

//fprintf(outf, "loop sig %s, startsig %s, found %s, looplen %d, bonus %d\n", sig[outlineno % MAX_LOOP_LEN], startsig, sig[(outlineno - looplen - 4) % MAX_LOOP_LEN], looplen, bonus);
			/* Assume loop instr adds 4 lines */
			looplen += 4;

			fpos_t loopendpos;
			int loopcount = 0;
			/* slurp until the loop terminates */
			while (!loop_concluded(tracef, loopendip)) {
				fgetpos(tracef, &loopendpos);
				/* we know the loop length from finding the signature, so move
				 * forward the same length plus the length of any previous
				 * contractions (as stuff further into the file will not be
				 * contracted) and see if we're at another loop instruction
				 */
				for (int i = 0; i < looplen + bonus; i++)
					if (!fgets(line, 180, tracef)) {	// eof
						clearerr(tracef);
						goto dissimilar;
					}
				/* if we are, treat this loop as identical, and carry on */
				if (!strncmp(line, sig[outlineno % MAX_LOOP_LEN], 9)) {
					inlineno += looplen + bonus;
					loopcount++;
					continue;
				}
dissimilar:
				/* if we're not, this loop is not identical, and cannot be compacted */
				fsetpos(tracef, &loopendpos);
//fprintf(outf, "loop changed size -- looking for %s, got %s\n", sig[outlineno % MAX_LOOP_LEN], line);
				break;
			}
			/* if we found one or more identical length loops, replace them in the
			 * output with a contraction message. Also add this message to the
			 * sliding signature log, so that it can be counted in, if this loop
			 * is inside another loop
			 */
			if (loopcount) {
				char loopstr[100];
				snprintf(loopstr, 100, "Loop of last %d lines repeats a further %d times", looplen, loopcount);
				fprintf(outf, "\n%s\n\n\n", loopstr);
				outlineno += 1;
				strncpy(sig[outlineno % MAX_LOOP_LEN], loopstr, 100);
			}
next:
			continue;
		}
	}

	free(sig);

	return 0;
}