source: trunk/src/org/apollo/meldex/Transcriber.java

Last change on this file was 1102, checked in by davidb, 6 years ago

Reworking of the code-base to separate logic from graphics. This version of Expeditee now supports a JFX graphics as an alternative to SWING

File size: 11.0 KB
Line 
1package org.apollo.meldex;
2
3import java.io.IOException;
4import java.util.ArrayList;
5
6public class Transcriber
7{
8 // The values of these constants are taken directly from Rodger's code
9 final double RMS_WINDOW = 10.0;
10 final double RMS_STEP = (RMS_WINDOW / 2);
11
12 // The sample rate (frequency) of the loaded sample
13 int sampleRate;
14
15 // Create instances of our classes
16 PitchTracker pitchTracker = new PitchTracker();
17 NoteRounder noteRounder = new NoteRounder();
18
19
20 /**
21 *
22 * @param sample
23 *
24 * @return
25 * The wave sample converted into a RogTrack structure if successful.
26 * Null if unsuccessful.
27 *
28 * @throws IOException
29 * If conversion to standardized form fails
30 */
31 public RogTrack transcribeSample(WavSample sample) throws IOException
32 {
33 // This shouldn't have to be fixed...
34 sampleRate = 22050;
35 // sampleRate = (int)SampledAudioManager.PLAYBACK_SAMPLE_RATE;
36
37 // These are values that should end up being parameters
38 int tempoBPM = 120;
39 int minRestLength = 4, minNoteLength = 2;
40 boolean addRestLeftovers = true;
41
42 // Check that the sample is valid
43 if (sample == null || sample.getFormat() == null || sample.getRawAudio() == null) {
44 return null;
45 }
46
47 // Get the standardised unsigned 8-bit mono data
48 //byte[] stdData = sample.getStandardisedData(true); // THis is bad of memory - was getting lots heap errors
49// AudioFormat standardizedFormat = new AudioFormat(
50// //sample.getFormat().getSampleRate(),
51// sampleRate,
52// //sample.getFormat().getSampleRate(),
53// 8, // 8-bit
54// 1, // mono
55// false, // unsigned
56// true); // Big endian
57//
58// byte[] stdData = AudioIO.convertAudioBytes(
59// sample.getRawAudio(),
60// sample.getFormat(),
61// standardizedFormat);
62
63 byte[] stdData = MeldexConversion.toStandardizedFormat(
64 sample.getRawAudio(), sample.getFormat(), sampleRate);
65
66 if (stdData == null) {
67 return null;
68 }
69
70 // Create a new RogTrack to store the result of the transcription
71 RogTrack track = new RogTrack();
72
73 // We don't know the time signature
74 track.addTimeSignature(0, 0);
75
76 // Calculate the Root Mean Squared data for the loaded sample
77 float[] rmsData = calculateRMS(stdData, stdData.length);
78
79 // Check that we have some data to process
80 if (rmsData.length <= 0) {
81 return null;
82 }
83
84
85 // int rmsStep = 111; // !! PURE-ROG!!
86 int rmsStep = (sampleRate / 200);
87 //int rmsStep = (sampleRate / 1000);
88
89
90 // Calculate the number of RMS steps per sixteenth note
91 int rmsStepsPer16th = (int) ((60F / (float) (tempoBPM * 4F)) / (RMS_STEP / 1000F));
92
93 // Calculate the sum of the squared RMS data
94 double sumRMSDataSquared = 0;
95 for (int rmsPos = 0; rmsPos < rmsData.length; rmsPos++) {
96 sumRMSDataSquared += (rmsData[rmsPos] * rmsData[rmsPos]);
97 }
98
99 // Calculate the upper and lower thresholds (asymmetrical??)
100 double threshold = Math.sqrt(sumRMSDataSquared / rmsData.length);
101 float upperThreshold = (float) (threshold * 0.55);
102 float lowerThreshold = (float) (threshold * 0.35);
103
104 // Now we find all of the notes in the sample...
105 int lastCents = -1, lastStart = 0, lastEnd = 0;
106 int restLen, noteLen;
107 for (int rmsPos = 0; rmsPos < rmsData.length; rmsPos++) {
108
109 // Have we found the start of a note??
110 if (rmsData[rmsPos] >= upperThreshold) {
111
112 // Yes, so find the end of the note
113 int noteStart = rmsPos;
114 while (rmsPos < rmsData.length) {
115 if (rmsData[rmsPos] <= lowerThreshold) {
116 break;
117 }
118 rmsPos++;
119 }
120
121 int noteEnd = rmsPos;
122 // If it is longer than 1/3 of the minimum note length, process it
123 if ((noteEnd - noteStart) >= ((minNoteLength * rmsStepsPer16th) / 3)) {
124
125 // Create a new array for the interesting data
126 int noteLength = (noteEnd - noteStart) * rmsStep;
127 byte[] noteData = new byte[noteLength];
128 for (int notePos = 0; notePos < noteLength; notePos++) {
129 noteData[notePos] = stdData[(noteStart * rmsStep) + notePos];
130 }
131
132 // Process the note
133 int cents = processNote(noteData, noteLength);
134 if (cents > 1000) {
135
136 // If it is the first note then initialise the NoteRounder
137 if (lastCents < 0) {
138 int noteRound = (noteRounder.roundCents(cents) - cents);
139 noteRounder.setInitialValue(noteRound);
140 }
141 // Otherwise we calculate the note and rest values
142 else {
143 int finalNote = noteRounder.roundNote(lastCents, true);
144
145 // Calculate the rest length
146 int rmsRestLeft = 0;
147 if (rmsStepsPer16th <= 0) {
148 restLen = 0;
149 }
150 else {
151 int rmsRestLen = (noteStart - lastEnd);
152 int mult = (minRestLength * rmsStepsPer16th);
153 int roundRest = noteRounder.round(rmsRestLen, mult);
154 restLen = (roundRest / rmsStepsPer16th);
155 if (addRestLeftovers == true) {
156 rmsRestLeft = rmsRestLen - (restLen * rmsStepsPer16th);
157 }
158 }
159
160 // Calculate the note length
161 if (rmsStepsPer16th <= 0) {
162 noteLen = 0;
163 }
164 else {
165 int rmsNoteLen = (lastEnd - lastStart);
166 int mult = (minNoteLength * rmsStepsPer16th);
167 if (addRestLeftovers == true && rmsRestLeft > 0) {
168 rmsNoteLen += rmsRestLeft;
169 }
170 int roundNote = noteRounder.round(rmsNoteLen, mult);
171 noteLen = (roundNote / rmsStepsPer16th);
172 }
173
174 // Add the notes and rests to the track
175 track.addNote(noteLen, (finalNote / 100), 0);
176 track.addRest(restLen);
177 }
178
179 lastCents = cents;
180 lastStart = noteStart;
181 lastEnd = noteEnd;
182 }
183 }
184 }
185 }
186
187 // Deal with the last note (if there is one)
188 if (lastCents > 0) {
189 int finalNote = noteRounder.roundNote(lastCents, true);
190
191 // Calculate the note length
192 if (rmsStepsPer16th <= 0) {
193 noteLen = 0;
194 }
195 else {
196 int rmsNoteLen = (lastEnd - lastStart);
197 int mult = (minNoteLength * rmsStepsPer16th);
198 int roundNote = noteRounder.round(rmsNoteLen, mult);
199 noteLen = (roundNote / rmsStepsPer16th);
200
201 // Add the last note to the track
202 track.addNote(noteLen, (finalNote / 100), 0);
203 }
204 }
205
206 // Calculate the key signature
207 int key = track.calculateBestKey();
208
209 // Add a key signature event to the track
210 track.addKeySignature(key, 0);
211
212 // Set the note names for the new key signature
213 track.setNoteNames(key);
214
215 // Return the Track with the transcribed data
216 return track;
217 }
218
219
220 private float[] calculateRMS(byte[] stdData, int stdLength)
221 {
222 int accMS = 0;
223 int accMSPrev = 0;
224 int frameStart = 0;
225
226 // int rmsStep = 111; // !! PURE ROG!!
227 int rmsStep = (22050 / 200); // !! SEMI-PURE ROG !!
228
229 // Allocate memory for the RMS data values
230 int rmsLength = (int) (stdLength / (float) rmsStep);
231 float[] rmsData = new float[rmsLength];
232
233 for (int stdPos = 0, rmsPos = 0; stdPos < stdLength; stdPos++) {
234 // Calculate the accumulated Mean Squared value
235 int value = (stdData[stdPos] & 255) - 127; // !! SHOULD 127 BE 128?? !!
236 accMS += (value * value);
237
238 // If we have finished a frame calculate a value for the output
239 if ((stdPos - frameStart) >= rmsStep) {
240 double result = Math.sqrt((double)(accMS+accMSPrev) / (double)(rmsStep*2));
241 frameStart = stdPos;
242 accMSPrev = accMS;
243 accMS = 0;
244 rmsData[rmsPos] = (float) result;
245 rmsPos++;
246 }
247 }
248
249 return rmsData;
250 }
251
252
253 private int processNote(byte[] data, int length)
254 {
255 // Pitch track the note
256 ArrayList<PitchValue> pitchData = pitchTracker.process(data, length, sampleRate);
257
258 // Average the pitch data
259 int pitchLength = averagePitchData(pitchData);
260
261 // Calculate the histogram of the pitch data
262 return calculateHistogram(pitchData, pitchLength);
263 }
264
265
266 private int averagePitchData(ArrayList<PitchValue> pitchData)
267 {
268 // Loop through the pitch values...
269 int i = 0, k = 0;
270 while (i < (pitchData.size() - 1)) {
271 int startPos = pitchData.get(i).position;
272 double period = pitchData.get(i).period;
273 double averagePeriod = period;
274 double runningPeriod = pitchData.get(i+1).position - startPos;
275 double numPeriods = runningPeriod / period;
276
277 // Start from the next pitch
278 int j = 1;
279 while ((i+j) < (pitchData.size() - 1)) {
280 // Get the next pitch estimate
281 period = pitchData.get(i+j).period;
282 int position = pitchData.get(i+j).position;
283
284 // Make sure that this period is covered by the average so far
285 if (period > (averagePeriod * 1.1) || period < (averagePeriod * 0.909)) {
286 break;
287 }
288
289 // Stop if we have covered more than 20 msec
290 int nextPos = pitchData.get(i+j+1).position;
291 // if ((nextPos - startPos) >= 445) { // !! PURE-ROG !!
292 if ((nextPos - startPos) >= (sampleRate * 0.02)) {
293 break;
294 }
295
296 // Add to this period
297 runningPeriod += (nextPos - position);
298 numPeriods += ((nextPos - position) / period);
299 averagePeriod = runningPeriod / numPeriods;
300 j++;
301 }
302
303 // Write over the original pitch value with the averaged pitch value
304 pitchData.set(k, new PitchValue(averagePeriod, startPos));
305 k++;
306
307 // Increment i
308 i += j;
309 }
310
311 // Return the number of averaged pitch values
312 return k;
313 }
314
315
316 private int calculateHistogram(ArrayList<PitchValue> data, int length)
317 {
318 // This probably shouldn't be a constant
319 final int histLength = 960;
320
321 // Allocate memory for the histogram data and initialise it to zero
322 int[] histData = new int[histLength];
323 for (int i = 0; i < histLength; i++) {
324 histData[i] = 0;
325 }
326
327 // Calculate the histogram data
328 for (int i = 0; i < (length - 1); i++) {
329 double period = data.get(i).period;
330 double position = data.get(i).position;
331 double periodLength = data.get(i+1).position - position;
332
333 // We only need to do this if the pitch does not equal zero
334 if (period > 0) {
335 // Do some weird stuff...
336 double logValue = (Math.log(period / sampleRate)) / (Math.log(10));
337 double cents = -(logValue / 0.000250858) - 3637.622659;
338 int histPos = (int) (Math.floor((cents - 3600.0) / 5.0));
339 if (histPos >= 0 && histPos < histLength) {
340 histData[histPos] += periodLength;
341 }
342 }
343 }
344
345 // Select the cents value using the histogram data
346 return selectCents(histData, histLength);
347 }
348
349
350 private int selectCents(int[] histData, int histLength)
351 {
352 // Find the maximum average histogram value
353 int maxHistValue = 0, maxHistPos = 0;
354 for (int i = 0; i < (histLength - 20); i++) {
355 // Sum the next 20 histogram values (1 semitone)
356 int histValue = 0;
357 for (int j = 0; j < 20; j++) {
358 histValue += histData[i+j];
359 }
360
361 // Maintain the maximum histogram value
362 if (histValue > maxHistValue) {
363 maxHistValue = histValue;
364 maxHistPos = i;
365 }
366 }
367
368 // Find the average cents value of the maximum histogram
369 int sumCents = 0;
370 for (int j = 0; j < 20; j++) {
371 sumCents += (histData[maxHistPos+j] * (((maxHistPos+j) * 5) + 3602));
372 }
373
374 // Calculate sum of (cents * time) divided by total time
375 if (maxHistValue > 0) {
376 return (sumCents / maxHistValue);
377 }
378 else {
379 return 0;
380 }
381 }
382}
Note: See TracBrowser for help on using the repository browser.