/** * A component of a library for * MoSeS. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ package uk.ac.leeds.ccg.andyt.projects.moses.io; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.PrintWriter; import java.io.RandomAccessFile; import java.io.StreamTokenizer; import java.util.HashMap; import java.util.Random; import java.util.TreeMap; import uk.ac.leeds.ccg.andyt.projects.moses.utilities.ErrorAndExceptionHandler; import uk.ac.leeds.ccg.andyt.projects.moses.utilities.StaticIO; /** * A class for handling an individual * CASKS013DataRecord and collections of * CASKS013DataRecords. * * * @author Andy * Turner * @version 1.0.0, 2006-08-10 * @see AbstractCASDataRecord */ public class CASKS013DataHandler extends AbstractCASDataHandler { /** * Creates a new instance of CASKS013DataHandler for handling * CASKS013DataRecords stored in a formatted File The default * File is hard coded. To specify the File use * CASKS013DataHandler(File). To set a different default * File edit the source code and recompile. */ public CASKS013DataHandler() throws IOException { // this( new File( // "C:/Work/Projects/MoSeS/Workspace/CASKS013DataRecords.dat" ) ); // Want also to setDirectory(); // initMemoryReserve(); // Default this.directory, this.file, this.randomAccessFile File directory = new File("C:/Work/Projects/MoSeS/Workspace/"); this.init(directory); this._File = new File(directory, "CASKS013DataRecords.dat"); if (!this._File.exists()) { this._File.createNewFile(); } this._RecordLength = new CASKS013DataRecord().getSizeInBytes(); // System.out.println("this.recordLength " + this.recordLength); this._RandomAccessFile = new RandomAccessFile(this._File, "r"); } /** * Creates a new instance of CASKS013DataHandler with Records loaded from * formattedFile. * * @param formattedFile * Formatted file of CASKS013DataRecords */ public CASKS013DataHandler(File formattedFile) throws IOException { // initMemoryReserve(); this.init(formattedFile.getParentFile()); this._RecordLength = new CASKS013DataRecord().getSizeInBytes(); load(formattedFile); System.out.println("CASKS013DataRecords loaded successfully"); } /** * @param args * the command line arguments No arguments are used. */ public static void main(String[] args) throws IOException { CASKS013DataHandler aCASKS013DataHandler = new CASKS013DataHandler(); aCASKS013DataHandler.run(); } /** * Top level run method */ private void run() throws IOException { // run1( true, 20 ); runAggregate(); run2(20); } /** * Aggregates CASKS013DataRecords */ private void runAggregate() throws IOException { // Aggregate to MSOA for Leeds long nDataRecords = this.getNDataRecords(); long nrecordsInLeeds = 2439L; long startRecordIDForLeeds = 56749L; long startRecordID = startRecordIDForLeeds; long endRecordID = startRecordIDForLeeds + nrecordsInLeeds; File _File = new File( "C:/Work/Projects/MoSeS/Workspace/Leeds/CASKS013DataRecordsMSOA.dat"); _File.createNewFile(); RandomAccessFile _RandomAccessFile = new RandomAccessFile(_File, "rw"); aggregateOAToMSOA(_RandomAccessFile, startRecordID, endRecordID); _RandomAccessFile.close(); CASKS013DataHandler _CASKS013DataHandler = new CASKS013DataHandler( _File); nDataRecords = _CASKS013DataHandler.getNDataRecords(); System.out.println("nDataRecords " + nDataRecords); CASKS013DataRecord _CASKS013DataRecord = new CASKS013DataRecord(); _File = new File( "C:/Work/Projects/MoSeS/Workspace/Leeds/CASKS013DataRecordsMSOA.csv"); PrintWriter _PrintWriter = new PrintWriter(_File); _PrintWriter.println(_CASKS013DataRecord.toCSVStringFields()); for (long along = 0L; along < nDataRecords; along++) { _CASKS013DataRecord = _CASKS013DataHandler.getCASKS013DataRecord(along); _PrintWriter.println(_CASKS013DataRecord.toCSVString()); System.out.println(_CASKS013DataRecord.toString()); } _PrintWriter.flush(); _PrintWriter.close(); } /** * Loads CASKS013DataRecords and prints out n randomly * * @param loadFromSource * If true, data is loaded from hardcoded source files as * downloaded from casweb. Otherwise, data is loaded from * this.formattedFile * @param n * The number of loaded data records to print out. */ private void run2(int n) throws IOException { File file = new File( "C:/Work/Projects/MoSeS/Workspace/Leeds/CASKS013DataRecordsMSOA.dat"); load(file); print(n, new Random()); } /** * Loads CAS001DataRecords and prints out n randomly * @param directory to load source data from * @param n the number of loaded data records to print out. */ protected void formatSourceData( File directory, int n) throws IOException { _RandomAccessFile = new RandomAccessFile(this._File, "rw"); // Load from source File infile; long long0 = 0L; long RecordID = 0L; // Load England infile = new File( directory, "KS013EnglandOA.csv"); RecordID = format(infile, RecordID, "England"); System.out.println(infile.toString() + " formatted successfully " + RecordID + " records"); // 165665 long0 = RecordID; // Load Wales infile = new File( directory, "KS013WalesOA.csv"); RecordID = format(infile, RecordID, "Wales"); System.out.println(infile.toString() + " formatted successfully " + (RecordID - long0) + " records"); // 9769 long0 = RecordID; // Load Scotland infile = new File( directory, "/KS013ScotlandOA.csv"); RecordID = format(infile, RecordID, "Scotland"); System.out.println(infile.toString() + " formatted successfully " + (RecordID - long0) + " records"); // 42604 long0 = RecordID; // Load Northern Ireland infile = new File( directory, "KS013NorthernIrelandOA.csv"); RecordID = format(infile, RecordID, "Northern Ireland"); System.out.println(infile.toString() + " formatted successfully " + (RecordID - long0) + " records"); // 5022 _RandomAccessFile.close(); print(20, new Random()); } /** * Uses a BufferedReader and a StreamTokenizer to * read lines from the sourceFile File. The lines are converted * to CASKS013DataRecords and written to * this.tRandomAccessFile. * * @param sourceFile * The source CASKS013DataRecords file to be formatted and * written to this.tRandomAccessFile. * @param RecordID * The RecordID to assign to the first * CASKS013DataRecord. * @param country * Identifies type of table * * @return The RecordID assigned to the last * CASKS013DataRecords. */ protected long format(File sourceFile, long RecordID, String country) throws IOException { System.out.println("format( File( " + sourceFile.toString() + " ), RecordID( " + RecordID + " ))"); BufferedReader aBufferedReader = new BufferedReader( new InputStreamReader(new FileInputStream(sourceFile))); StreamTokenizer aStreamTokenizer = new StreamTokenizer(aBufferedReader); StaticIO.setStreamTokenizerSyntax1(aStreamTokenizer); String string0 = new String(); String string1; String string2; long long0; long longZero = 0L; CASKS013DataRecord aCASKS013DataRecord = new CASKS013DataRecord(); boolean print = false; int int10000 = 10000; // Skip the first line int tokenType = aStreamTokenizer.nextToken(); while (tokenType != StreamTokenizer.TT_EOL) { tokenType = aStreamTokenizer.nextToken(); } tokenType = aStreamTokenizer.nextToken(); while (tokenType != StreamTokenizer.TT_EOF) { switch (tokenType) { case StreamTokenizer.TT_EOL: long0 = RecordID % int10000; print = (long0 == longZero); if (print) { string2 = aCASKS013DataRecord.toString(); System.out.println(string2); string2 = string0; } // Write out aCASKS013DataRecord.write(_RandomAccessFile); RecordID++; break; case StreamTokenizer.TT_WORD: string1 = aStreamTokenizer.sval; aCASKS013DataRecord = new CASKS013DataRecord(RecordID, string1, country); break; } string1 = string0; tokenType = aStreamTokenizer.nextToken(); } System.out.println("Number of Records loaded = " + RecordID); return RecordID; } /** * @return a CASKS013DataRecord with * AbstractCASDataRecord.RecordID = RecordID * @param RecordID * The RecordID of the CASKS013DataRecord to be returned. */ public AbstractCASDataRecord getDataRecord(long RecordID) { return getCASKS013DataRecord(RecordID); } /** * @return a CASKS013DataRecord with * CASKS013DataRecord.RecordID = RecordID * @param RecordID * The RecordID of the CASKS013DataRecord to be returned. */ public CASKS013DataRecord getCASKS013DataRecord(long RecordID) { CASKS013DataRecord result = null; try { this._RandomAccessFile.seek(_RecordLength * RecordID); result = new CASKS013DataRecord(this._RandomAccessFile); } catch (IOException aIOException) { System.err.println(aIOException.getLocalizedMessage()); System.exit(ErrorAndExceptionHandler.IOException); } return result; } /** * Aggregates CASKS013DataRecords from OA To Ward for the OA * records in the range [startRecordID,endRecordID] and writes the results * to aRandomAccessFile * * @param aRandomAccessFile * RandomAccessFile to which results are written * @param startRecordID * The first OA RecordID in the sequence to be aggregated. * @param endRecordID * The last OA RecordID in the sequence to be aggregated. */ public void aggregateOAToWard(RandomAccessFile aRandomAccessFile, long startRecordID, long endRecordID) throws IOException { TreeMap result = new TreeMap(); CASKS013DataRecord aCASKS013DataRecord; CASKS013DataRecord bCASKS013DataRecord; String zoneCode; Object zoneCodeWard; // long newRecordID = startRecordIDForLeeds - 1L; long newRecordID = -1L; for (long RecordID = startRecordID; RecordID < endRecordID; RecordID++) { aCASKS013DataRecord = (CASKS013DataRecord) getDataRecord(RecordID); zoneCode = new String(aCASKS013DataRecord.getZone_Code()); zoneCodeWard = zoneCode.substring(0, 6); if (result.containsKey(zoneCodeWard)) { bCASKS013DataRecord = (CASKS013DataRecord) result.get(zoneCodeWard); result.remove(zoneCodeWard); result.put(zoneCodeWard, aCASKS013DataRecord.aggregate(bCASKS013DataRecord)); } else { result.put(zoneCodeWard, aCASKS013DataRecord); } } write(aRandomAccessFile, result); } /** * Aggregates CASKS013DataRecords from OA To MSOA for the OA * records in the range [startRecordID,endRecordID] and writes the results * to aRandomAccessFile * * @param aRandomAccessFile * RandomAccessFile to which results are written * @param startRecordID * The first OA RecordID in the sequence to be aggregated. * @param endRecordID * The last OA RecordID in the sequence to be aggregated. */ public void aggregateOAToMSOA(RandomAccessFile aRandomAccessFile, long startRecordID, long endRecordID) throws IOException { TreeMap result = new TreeMap(); HashMap lookUpMSOAfromOAHashMap = get_LookUpMSOAfromOAHashMap(); CASKS013DataRecord aCASKS013DataRecord; CASKS013DataRecord bCASKS013DataRecord; String zoneCode; Object zoneCodeMSOA; // long newRecordID = startRecordIDForLeeds - 1L; long newRecordID = -1L; for (long RecordID = startRecordID; RecordID < endRecordID; RecordID++) { aCASKS013DataRecord = (CASKS013DataRecord) getDataRecord(RecordID); zoneCode = new String(aCASKS013DataRecord.getZone_Code()); zoneCodeMSOA = lookUpMSOAfromOAHashMap.get(zoneCode); if (result.containsKey(zoneCodeMSOA)) { bCASKS013DataRecord = (CASKS013DataRecord) result.get(zoneCodeMSOA); result.remove(zoneCodeMSOA); result.put(zoneCodeMSOA, aCASKS013DataRecord.aggregate(bCASKS013DataRecord)); } else { result.put(zoneCodeMSOA, aCASKS013DataRecord); } } write(aRandomAccessFile, result); } }