source: trunk/src/org/apollo/meldex/Transcriber.java@ 1006

Last change on this file since 1006 was 315, checked in by bjn8, 16 years ago

Apollo spin-off added

File size: 11.2 KB
Line 
1package org.apollo.meldex;
2
3import java.io.IOException;
4import java.util.ArrayList;
5
6@SuppressWarnings("unchecked") // code in java 1.4
7public class Transcriber
8{
9 // The values of these constants are taken directly from Rodger's code
10 final double RMS_WINDOW = 10.0;
11 final double RMS_STEP = (RMS_WINDOW / 2);
12
13 // The sample rate (frequency) of the loaded sample
14 int sampleRate;
15
16 // Create instances of our classes
17 PitchTracker pitchTracker = new PitchTracker();
18 NoteRounder noteRounder = new NoteRounder();
19
20
21 /**
22 *
23 * @param sample
24 *
25 * @return
26 * The wave sample converted into a RogTrack structure if successful.
27 * Null if unsuccessful.
28 *
29 * @throws IOException
30 * If conversion to standardized form fails
31 */
32 public RogTrack transcribeSample(WavSample sample) throws IOException
33 {
34 // This shouldn't have to be fixed...
35 sampleRate = 22050;
36 // sampleRate = (int)SampledAudioManager.PLAYBACK_SAMPLE_RATE;
37
38 // These are values that should end up being parameters
39 int tempoBPM = 120;
40 int minRestLength = 4, minNoteLength = 2;
41 boolean addRestLeftovers = true;
42
43 // Check that the sample is valid
44 if (sample == null || sample.getFormat() == null || sample.getRawAudio() == null) {
45 return null;
46 }
47
48 // Get the standardised unsigned 8-bit mono data
49 //byte[] stdData = sample.getStandardisedData(true); // THis is bad of memory - was getting lots heap errors
50// AudioFormat standardizedFormat = new AudioFormat(
51// //sample.getFormat().getSampleRate(),
52// sampleRate,
53// //sample.getFormat().getSampleRate(),
54// 8, // 8-bit
55// 1, // mono
56// false, // unsigned
57// true); // Big endian
58//
59// byte[] stdData = AudioIO.convertAudioBytes(
60// sample.getRawAudio(),
61// sample.getFormat(),
62// standardizedFormat);
63
64 byte[] stdData = MeldexConversion.toStandardizedFormat(
65 sample.getRawAudio(), sample.getFormat(), sampleRate);
66
67 if (stdData == null) {
68 return null;
69 }
70
71 // Create a new RogTrack to store the result of the transcription
72 RogTrack track = new RogTrack();
73
74 // We don't know the time signature
75 track.addTimeSignature(0, 0);
76
77 // Calculate the Root Mean Squared data for the loaded sample
78 float[] rmsData = calculateRMS(stdData, stdData.length);
79
80 // Check that we have some data to process
81 if (rmsData.length <= 0) {
82 return null;
83 }
84
85
86 // int rmsStep = 111; // !! PURE-ROG!!
87 int rmsStep = (sampleRate / 200);
88 //int rmsStep = (sampleRate / 1000);
89
90
91 // Calculate the number of RMS steps per sixteenth note
92 int rmsStepsPer16th = (int) ((60F / (float) (tempoBPM * 4F)) / (RMS_STEP / 1000F));
93
94 // Calculate the sum of the squared RMS data
95 double sumRMSDataSquared = 0;
96 for (int rmsPos = 0; rmsPos < rmsData.length; rmsPos++) {
97 sumRMSDataSquared += (rmsData[rmsPos] * rmsData[rmsPos]);
98 }
99
100 // Calculate the upper and lower thresholds (asymmetrical??)
101 double threshold = Math.sqrt(sumRMSDataSquared / rmsData.length);
102 float upperThreshold = (float) (threshold * 0.55);
103 float lowerThreshold = (float) (threshold * 0.35);
104
105 // Now we find all of the notes in the sample...
106 int lastCents = -1, lastStart = 0, lastEnd = 0;
107 int restLen, noteLen;
108 for (int rmsPos = 0; rmsPos < rmsData.length; rmsPos++) {
109
110 // Have we found the start of a note??
111 if (rmsData[rmsPos] >= upperThreshold) {
112
113 // Yes, so find the end of the note
114 int noteStart = rmsPos;
115 while (rmsPos < rmsData.length) {
116 if (rmsData[rmsPos] <= lowerThreshold) {
117 break;
118 }
119 rmsPos++;
120 }
121
122 int noteEnd = rmsPos;
123 // If it is longer than 1/3 of the minimum note length, process it
124 if ((noteEnd - noteStart) >= ((minNoteLength * rmsStepsPer16th) / 3)) {
125
126 // Create a new array for the interesting data
127 int noteLength = (noteEnd - noteStart) * rmsStep;
128 byte[] noteData = new byte[noteLength];
129 for (int notePos = 0; notePos < noteLength; notePos++) {
130 noteData[notePos] = stdData[(noteStart * rmsStep) + notePos];
131 }
132
133 // Process the note
134 int cents = processNote(noteData, noteLength);
135 if (cents > 1000) {
136
137 // If it is the first note then initialise the NoteRounder
138 if (lastCents < 0) {
139 int noteRound = (noteRounder.roundCents(cents) - cents);
140 noteRounder.setInitialValue(noteRound);
141 }
142 // Otherwise we calculate the note and rest values
143 else {
144 int finalNote = noteRounder.roundNote(lastCents, true);
145
146 // Calculate the rest length
147 int rmsRestLeft = 0;
148 if (rmsStepsPer16th <= 0) {
149 restLen = 0;
150 }
151 else {
152 int rmsRestLen = (noteStart - lastEnd);
153 int mult = (minRestLength * rmsStepsPer16th);
154 int roundRest = noteRounder.round(rmsRestLen, mult);
155 restLen = (roundRest / rmsStepsPer16th);
156 if (addRestLeftovers == true) {
157 rmsRestLeft = rmsRestLen - (restLen * rmsStepsPer16th);
158 }
159 }
160
161 // Calculate the note length
162 if (rmsStepsPer16th <= 0) {
163 noteLen = 0;
164 }
165 else {
166 int rmsNoteLen = (lastEnd - lastStart);
167 int mult = (minNoteLength * rmsStepsPer16th);
168 if (addRestLeftovers == true && rmsRestLeft > 0) {
169 rmsNoteLen += rmsRestLeft;
170 }
171 int roundNote = noteRounder.round(rmsNoteLen, mult);
172 noteLen = (roundNote / rmsStepsPer16th);
173 }
174
175 // Add the notes and rests to the track
176 track.addNote(noteLen, (finalNote / 100), 0);
177 track.addRest(restLen);
178 }
179
180 lastCents = cents;
181 lastStart = noteStart;
182 lastEnd = noteEnd;
183 }
184 }
185 }
186 }
187
188 // Deal with the last note (if there is one)
189 if (lastCents > 0) {
190 int finalNote = noteRounder.roundNote(lastCents, true);
191
192 // Calculate the note length
193 if (rmsStepsPer16th <= 0) {
194 noteLen = 0;
195 }
196 else {
197 int rmsNoteLen = (lastEnd - lastStart);
198 int mult = (minNoteLength * rmsStepsPer16th);
199 int roundNote = noteRounder.round(rmsNoteLen, mult);
200 noteLen = (roundNote / rmsStepsPer16th);
201
202 // Add the last note to the track
203 track.addNote(noteLen, (finalNote / 100), 0);
204 }
205 }
206
207 // Calculate the key signature
208 int key = track.calculateBestKey();
209
210 // Add a key signature event to the track
211 track.addKeySignature(key, 0);
212
213 // Set the note names for the new key signature
214 track.setNoteNames(key);
215
216 // Return the Track with the transcribed data
217 return track;
218 }
219
220
221 private float[] calculateRMS(byte[] stdData, int stdLength)
222 {
223 int accMS = 0;
224 int accMSPrev = 0;
225 int frameStart = 0;
226
227 // int rmsStep = 111; // !! PURE ROG!!
228 int rmsStep = (22050 / 200); // !! SEMI-PURE ROG !!
229
230 // Allocate memory for the RMS data values
231 int rmsLength = (int) (stdLength / (float) rmsStep);
232 float[] rmsData = new float[rmsLength];
233
234 for (int stdPos = 0, rmsPos = 0; stdPos < stdLength; stdPos++) {
235 // Calculate the accumulated Mean Squared value
236 int value = (stdData[stdPos] & 255) - 127; // !! SHOULD 127 BE 128?? !!
237 accMS += (value * value);
238
239 // If we have finished a frame calculate a value for the output
240 if ((stdPos - frameStart) >= rmsStep) {
241 double result = Math.sqrt((double)(accMS+accMSPrev) / (double)(rmsStep*2));
242 frameStart = stdPos;
243 accMSPrev = accMS;
244 accMS = 0;
245 rmsData[rmsPos] = (float) result;
246 rmsPos++;
247 }
248 }
249
250 return rmsData;
251 }
252
253
254 private int processNote(byte[] data, int length)
255 {
256 // Pitch track the note
257 ArrayList pitchData = pitchTracker.process(data, length, sampleRate);
258
259 // Average the pitch data
260 int pitchLength = averagePitchData(pitchData);
261
262 // Calculate the histogram of the pitch data
263 return calculateHistogram(pitchData, pitchLength);
264 }
265
266
267 private int averagePitchData(ArrayList pitchData)
268 {
269 // Loop through the pitch values...
270 int i = 0, k = 0;
271 while (i < (pitchData.size() - 1)) {
272 int startPos = ((PitchValue) pitchData.get(i)).position;
273 double period = ((PitchValue) pitchData.get(i)).period;
274 double averagePeriod = period;
275 double runningPeriod = ((PitchValue) pitchData.get(i+1)).position - startPos;
276 double numPeriods = runningPeriod / period;
277
278 // Start from the next pitch
279 int j = 1;
280 while ((i+j) < (pitchData.size() - 1)) {
281 // Get the next pitch estimate
282 period = ((PitchValue) pitchData.get(i+j)).period;
283 int position = ((PitchValue) pitchData.get(i+j)).position;
284
285 // Make sure that this period is covered by the average so far
286 if (period > (averagePeriod * 1.1) || period < (averagePeriod * 0.909)) {
287 break;
288 }
289
290 // Stop if we have covered more than 20 msec
291 int nextPos = ((PitchValue) pitchData.get(i+j+1)).position;
292 // if ((nextPos - startPos) >= 445) { // !! PURE-ROG !!
293 if ((nextPos - startPos) >= (sampleRate * 0.02)) {
294 break;
295 }
296
297 // Add to this period
298 runningPeriod += (nextPos - position);
299 numPeriods += ((nextPos - position) / period);
300 averagePeriod = runningPeriod / numPeriods;
301 j++;
302 }
303
304 // Write over the original pitch value with the averaged pitch value
305 pitchData.set(k, new PitchValue(averagePeriod, startPos));
306 k++;
307
308 // Increment i
309 i += j;
310 }
311
312 // Return the number of averaged pitch values
313 return k;
314 }
315
316
317 private int calculateHistogram(ArrayList data, int length)
318 {
319 // This probably shouldn't be a constant
320 final int histLength = 960;
321
322 // Allocate memory for the histogram data and initialise it to zero
323 int[] histData = new int[histLength];
324 for (int i = 0; i < histLength; i++) {
325 histData[i] = 0;
326 }
327
328 // Calculate the histogram data
329 for (int i = 0; i < (length - 1); i++) {
330 double period = ((PitchValue) data.get(i)).period;
331 double position = ((PitchValue) data.get(i)).position;
332 double periodLength = ((PitchValue) data.get(i+1)).position - position;
333
334 // We only need to do this if the pitch does not equal zero
335 if (period > 0) {
336 // Do some weird stuff...
337 double logValue = (Math.log(period / sampleRate)) / (Math.log(10));
338 double cents = -(logValue / 0.000250858) - 3637.622659;
339 int histPos = (int) (Math.floor((cents - 3600.0) / 5.0));
340 if (histPos >= 0 && histPos < histLength) {
341 histData[histPos] += periodLength;
342 }
343 }
344 }
345
346 // Select the cents value using the histogram data
347 return selectCents(histData, histLength);
348 }
349
350
351 private int selectCents(int[] histData, int histLength)
352 {
353 // Find the maximum average histogram value
354 int maxHistValue = 0, maxHistPos = 0;
355 for (int i = 0; i < (histLength - 20); i++) {
356 // Sum the next 20 histogram values (1 semitone)
357 int histValue = 0;
358 for (int j = 0; j < 20; j++) {
359 histValue += histData[i+j];
360 }
361
362 // Maintain the maximum histogram value
363 if (histValue > maxHistValue) {
364 maxHistValue = histValue;
365 maxHistPos = i;
366 }
367 }
368
369 // Find the average cents value of the maximum histogram
370 int sumCents = 0;
371 for (int j = 0; j < 20; j++) {
372 sumCents += (histData[maxHistPos+j] * (((maxHistPos+j) * 5) + 3602));
373 }
374
375 // Calculate sum of (cents * time) divided by total time
376 if (maxHistValue > 0) {
377 return (sumCents / maxHistValue);
378 }
379 else {
380 return 0;
381 }
382 }
383}
Note: See TracBrowser for help on using the repository browser.